blob: 98e8d7f597560603a8792509f4c1f6d15b3a33d2 [file] [log] [blame]
swissChili729acd52024-03-05 11:52:45 -05001#! /bin/sh
2# ******************************************************************************
3# elemcvt: convert NIST "Linear ASCII" table of elements to units(1) format
4# Usage: elemcvt [options] [<file>]
5# Author: Jeff Conrad
6# Date: 2024-01-06
7# ******************************************************************************
8
9# Adjust PATH to suit.
10# For Windows w/MKS Toolkit, this assumes /bin is a symbolic link to
11# $ROOTDIR/mksnt.
12
13PATH=/bin
14
15progname=${0##*[/\\]}
16progname=${progname%.sh}
17export TITLEBAR=$progname
18
19umsg="Usage: $progname [options] [file]
20Options:
21 -d Show elements for which no standard atomic mass is given and exit
22 -v Verbose"
23
24show_no_std_atomic_mass= # show elements for which no std atomic mass is given
25verbose=
26errors=
27DUALCASE=1 # used in MKS Toolkit to make options case sensitive
28
29while getopts :dv arg
30do
31 case $arg in
32 d)
33 show_no_std_atomic_mass=YES ;;
34 v)
35 verbose=YES ;;
36 :)
37 # OPTARG contains the option missing the argument
38 print -ru2 -- "$progname: option $OPTARG requires an argument"
39 errors=YES
40 ;;
41 [?])
42 # OPTARG contains the invalid option
43 print -ru2 -- "$progname: unknown option $OPTARG"
44 errors=YES
45 ;;
46 esac
47done
48shift $((OPTIND - 1))
49unset DUALCASE
50
51if [ -n "$errors" ]
52then
53 print -ru2 -- "$umsg"
54 exit 1
55fi
56
57awk '
58
59function show_element_info(atomic_number, atomic_symbol, std_atomic_mass_str)
60{
61 printf("# %s: %s (%d)", names[atomic_number], atomic_symbol, atomic_number)
62 if (std_atomic_mass_str)
63 printf(" std atomic weight: %s", std_atomic_mass_str)
64 print ""
65}
66
67# <name>_<atomic num> <mass> # <mole fraction>
68function show_isotope(name, num)
69{
70 printf("%-*s%*s%*.*f", max_isotope_len, sprintf("%s_%d", name, num),
71 sepwid, " ", isoprecis + 4, isoprecis, mass[num])
72 if (composition[num])
73 printf(" # %.*f", compprecis, composition[num])
74 print ""
75}
76
77function show_element_name(name)
78{
79 printf("%-*s%*s", max_name_len, name, sepwid, " ")
80}
81
82# <mole fraction> <name>_<mass num>
83function mole_fraction(atomic_number, names, mass_num, mass_wid)
84{
85 if (composition[mass_num] == 1) {
86 mass_wid = length(int(mass_number[n_isotopes]))
87 # align with 1st digit of atomic mass
88 printf("%*s%s_%d", sepwid + 2 - mass_wid, " ", names[atomic_number], mass_num)
89 }
90 else
91 printf("%.*f %s_%d", compprecis, composition[mass_num],
92 names[atomic_number], mass_num)
93}
94
95# add line continuation and '+' sign
96function add_continuation()
97{
98 printf(" \\\n")
99 printf("%-*s+ ", max_name_len + sepwid - 2, " ")
100}
101
102# <name>_<mass num> # most stable
103function use_most_stable(atomic_number, mass, mass_wid)
104{
105 mass_wid = length(int(mass_number[n_isotopes]))
106 printf("%*s%-*s # most stable", sepwid + 2 - mass_wid, " ",
107 isoprecis + 1 + mass_wid, sprintf("%s_%d", names[atomic_number], mass))
108}
109
110# <name>_<mass num> # standard atomic mass
111function use_std_mass(atomic_number, mass, mass_wid)
112{
113 mass_wid = length(int(mass_number[n_isotopes]))
114 printf("%*s%-*s # standard atomic mass", sepwid + 2 - mass_wid, " ",
115 isoprecis + 1 + mass_wid, sprintf("%s_%d", names[atomic_number], mass))
116}
117
118# show isotopes and the sum of mole fraction-mass products
119function show_element_data(atomic_number, names)
120{
121 # isotopes and relative abundances
122 for (ndx = 1; ndx <= n_isotopes; ndx++) {
123 mass_num = mass_number[ndx]
124 show_isotope(names[atomic_number], mass_num)
125 }
126
127 # show a value for atomic mass if one of these is available;
128 # otherwise, show only isotope masses.
129 if (total_composition > 0 || most_stable_mass || std_atomic_mass)
130 show_element = 1
131 else
132 show_element = 0
133
134 # atomic mass: sum of mole fraction-mass products
135 # element name
136 if (show_element)
137 show_element_name(names[atomic_number])
138
139 mass_num = mass_number[1]
140 firstval = 0
141
142 # first isotope
143 if (composition[mass_num] > 0) {
144 mole_fraction(atomic_number, names, mass_num)
145 firstval = 1
146 }
147 if (n_isotopes > 1) {
148 for (ndx = 2; ndx < n_isotopes; ndx++) {
149 mass_num = mass_number[ndx]
150 if (composition[mass_num] > 0) {
151 if (firstval == 1)
152 add_continuation()
153 mole_fraction(atomic_number, names, mass_num)
154 firstval = 1
155 }
156 }
157 # last isotope
158 mass_num = mass_number[n_isotopes]
159 if (composition[mass_num] > 0) {
160 if (firstval == 1)
161 add_continuation()
162 mole_fraction(atomic_number, names, mass_num)
163 print ""
164 }
165 }
166 else
167 print ""
168
169 # options if mole fraction is not given for any isotope
170 if (total_composition == 0) {
171 if (most_stable_mass)
172 use_most_stable(atomic_number, most_stable_mass)
173 else if (std_atomic_mass)
174 use_std_mass(atomic_number, std_atomic_mass)
175 }
176
177 if (! composition[mass_number[n_isotopes]])
178 print ""
179}
180
181function output(atomic_number)
182{
183 sepwid = 5 # width of column separation
184 compprecis = 8 # for mole fraction
185 isoprecis = 10 # for isotope mass
186
187 # NIST show H, D, and T
188 if (atomic_number == 1)
189 atomic_symbol = "H"
190
191 show_element_info(atomic_number, atomic_symbol, std_atomic_mass_str)
192
193 if (am_names[atomic_number])
194 print "# IUPAC spelling"
195
196 show_element_data(atomic_number, names)
197
198 # show American spelling if different from IUPAC
199 if (am_names[atomic_number]) {
200 print "# American spelling"
201 show_element_data(atomic_number, am_names)
202 }
203 if (show_element)
204 print "" # blank line between elements
205
206 most_stable_mass = 0
207 total_composition = 0
208}
209
210function gnu_notes()
211{
212 print "\
213# This file is the elements database for use with GNU units, a units\n\
214# conversion program by Adrian Mariano adrianm@gnu.org\n\
215#\n\
216# January 2024 Version 1.0\n\
217#\n\
218# Copyright (C) 2024\n\
219# Free Software Foundation, Inc\n\
220#\n\
221# This program is free software; you can redistribute it and/or modify\n\
222# it under the terms of the GNU General Public License as published by\n\
223# the Free Software Foundation; either version 3 of the License, or\n\
224# (at your option) any later version.\n\
225#\n\
226# This data is distributed in the hope that it will be useful,\n\
227# but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
228# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
229# GNU General Public License for more details.\n\
230#\n\
231# You should have received a copy of the GNU General Public License\n\
232# along with this program; if not, write to the Free Software\n\
233# Foundation, Inc., 51 Franklin Street, Fifth Floor,\n\
234# Boston, MA 02110-1301 USA\n"
235}
236
237function nist_notes()
238{
239 print "# From https://www.nist.gov/pml/atomic-weights-and-isotopic-compositions-relative-atomic-masses\n"
240
241 # notes from https://www.nist.gov/pml/atomic-weights-and-isotopic-compositions-column-descriptions
242 print "\
243# For several elements, the standard atomic weight A_r is given as an\n\
244# atomic-weight interval with the symbol [a,b] to denote the set of\n\
245# atomic-weight values in normal materials; thus, [a <= A_r(E) <= b].\n\
246# The symbols a and b denote the lower and upper bounds of the\n\
247# interval [a,b], respectively. The values in parentheses, following\n\
248# the last significant digit to which they are attributed, are\n\
249# uncertainties.\n\
250#\n\
251# Brackets [ ] enclosing a single value indicate the mass number of\n\
252# the most stable isotope. For radioactive elements with atomic\n\
253# numbers 95 or greater, the mass number of the most stable isotope is\n\
254# not specified, as the list of studied isotopes is still\n\
255# incomplete.\n"
256}
257
258function units_notes()
259{
260 print "\
261# When composition mole fractions of isotopes are given, the atomic mass\n\
262# of an element is given as the sum of the product(s) of mole\n\
263# fraction(s) and the atomic masses of the relevant isotopes. When composition\n\
264# mole fractions are not given, the atomic mass is given as\n\
265#\n\
266# * the mass of the most stable isotope, if available, or\n\
267# * the standard atomic mass of the element, if available.\n\
268#\n\
269# If neither the most stable isotope nore a standard atomic mass is\n\
270# available, no atomic mass for the element is given; the user must\n\
271# select the isotope most suitable for their purposes.\n\
272#\n\
273# If the standard atomic mass is a range, the value given is the\n\
274# midpoint of that range, which may differ from the value determined\n\
275# from the sum of the products of composition mole fraction and isotope\n\
276# atomic mass.\n"
277}
278
279BEGIN {
280 FS = " *= *"
281 show_no_std_atomic_mass = "'"$show_no_std_atomic_mass"'"
282 verbose = "'"$verbose"'"
283 console = "/dev/console"
284
285 # IUPAC spellings
286 names[1] = "hydrogen"
287 names[2] = "helium"
288 names[3] = "lithium"
289 names[4] = "beryllium"
290 names[5] = "boron"
291 names[6] = "carbon"
292 names[7] = "nitrogen"
293 names[8] = "oxygen"
294 names[9] = "fluorine"
295 names[10] = "neon"
296 names[11] = "sodium"
297 names[12] = "magnesium"
298 names[13] = "aluminium"
299 names[14] = "silicon"
300 names[15] = "phosphorus"
301 names[16] = "sulfur"
302 names[17] = "chlorine"
303 names[18] = "argon"
304 names[19] = "potassium"
305 names[20] = "calcium"
306 names[21] = "scandium"
307 names[22] = "titanium"
308 names[23] = "vanadium"
309 names[24] = "chromium"
310 names[25] = "manganese"
311 names[26] = "iron"
312 names[27] = "cobalt"
313 names[28] = "nickel"
314 names[29] = "copper"
315 names[30] = "zinc"
316 names[31] = "gallium"
317 names[32] = "germanium"
318 names[33] = "arsenic"
319 names[34] = "selenium"
320 names[35] = "bromine"
321 names[36] = "krypton"
322 names[37] = "rubidium"
323 names[38] = "strontium"
324 names[39] = "yttrium"
325 names[40] = "zirconium"
326 names[41] = "niobium"
327 names[42] = "molybdenum"
328 names[43] = "technetium"
329 names[44] = "ruthenium"
330 names[45] = "rhodium"
331 names[46] = "palladium"
332 names[47] = "silver"
333 names[48] = "cadmium"
334 names[49] = "indium"
335 names[50] = "tin"
336 names[51] = "antimony"
337 names[52] = "tellurium"
338 names[53] = "iodine"
339 names[54] = "xenon"
340 names[55] = "caesium"
341 names[56] = "barium"
342 names[57] = "lanthanum"
343 names[58] = "cerium"
344 names[59] = "praseodymium"
345 names[60] = "neodymium"
346 names[61] = "promethium"
347 names[62] = "samarium"
348 names[63] = "europium"
349 names[64] = "gadolinium"
350 names[65] = "terbium"
351 names[66] = "dysprosium"
352 names[67] = "holmium"
353 names[68] = "erbium"
354 names[69] = "thulium"
355 names[70] = "ytterbium"
356 names[71] = "lutetium"
357 names[72] = "hafnium"
358 names[73] = "tantalum"
359 names[74] = "tungsten"
360 names[75] = "rhenium"
361 names[76] = "osmium"
362 names[77] = "iridium"
363 names[78] = "platinum"
364 names[79] = "gold"
365 names[80] = "mercury"
366 names[81] = "thallium"
367 names[82] = "lead"
368 names[83] = "bismuth"
369 names[84] = "polonium"
370 names[85] = "astatine"
371 names[86] = "radon"
372 names[87] = "francium"
373 names[88] = "radium"
374 names[89] = "actinium"
375 names[90] = "thorium"
376 names[91] = "protactinium"
377 names[92] = "uranium"
378 names[93] = "neptunium"
379 names[94] = "plutonium"
380 names[95] = "americium"
381 names[96] = "curium"
382 names[97] = "berkelium"
383 names[98] = "californium"
384 names[99] = "einsteinium"
385 names[100] = "fermium"
386 names[101] = "mendelevium"
387 names[102] = "nobelium"
388 names[103] = "lawrencium"
389 names[104] = "rutherfordium"
390 names[105] = "dubnium"
391 names[106] = "seaborgium"
392 names[107] = "bohrium"
393 names[108] = "hassium"
394 names[109] = "meitnerium"
395 names[110] = "darmstadtium"
396 names[111] = "roentgenium"
397 names[112] = "copernicium"
398 names[113] = "nihonium"
399 names[114] = "flerovium"
400 names[115] = "moscovium"
401 names[116] = "livermorium"
402 names[117] = "tennessine"
403 names[118] = "oganesson"
404
405 # American spellings
406 am_names[13] = "aluminum"
407 am_names[55] = "cesium"
408
409 max_name_len = 0 # length of longest element name
410 for (i = 1; i <= 118; i++) {
411 len = length(names[i])
412 if (len > max_name_len) {
413 max_name_len = len;
414 longestname = names[i]
415 }
416 }
417 max_isotope_len = max_name_len + 4 # allow for "_xxx" suffix
418
419 if (! show_no_std_atomic_mass) {
420 gnu_notes()
421 nist_notes()
422 units_notes()
423 }
424
425 if (verbose)
426 printf("Longest element name: %s (%d)\n\n", longestname, max_name_len)
427
428 n_isotopes = 0
429 mass_number[1] = 0
430}
431
432# begin file processing
433
434# skip JavaScript and HTML before data
435NR == 1, $0 ~ /<pre/ { next }
436# skip HTML after data
437$0 ~ /<\/pre>/ { exit }
438
439# remove trailing space and unpaddable spaces
440{
441 gsub(/&nbsp;/, "")
442 gsub(/ +$/, "")
443}
444
445$1 ~ /Atomic Number/ {
446 last_atomic_number = atomic_number
447 atomic_number = $2 + 0
448 if (atomic_number != last_atomic_number && atomic_number > 1) {
449 if (show_no_std_atomic_mass) {
450 if (! std_atomic_mass_str)
451 print names[last_atomic_number]
452 }
453 else
454 output(last_atomic_number)
455 }
456}
457
458$1 ~ /Atomic Symbol/ {
459 atomic_symbol = $2
460}
461
462$1 ~ /Mass Number/ {
463 if (atomic_number != last_atomic_number) {
464 for (i = 1; i <= n_isotopes; i++)
465 delete mass_number[i]
466 n_isotopes = 0
467 }
468 mass_number[++n_isotopes] = $2
469}
470
471$1 ~ /Relative Atomic Mass/ {
472 atomic_mass = $2
473 sub(/\([[:digit:]#]+\)/, "", atomic_mass)
474 mass[mass_number[n_isotopes]] = atomic_mass
475}
476
477$1 ~ /Isotopic Composition/ {
478 isotopic_composition = $2
479 sub(/\([[:digit:]#]+\)/, "", isotopic_composition)
480 composition[mass_number[n_isotopes]] = isotopic_composition
481 total_composition += isotopic_composition
482}
483
484$1 ~ /Standard Atomic Weight/ {
485 std_atomic_mass = std_atomic_mass_str = $2
486 gsub(/\([^)]+\)/, "", std_atomic_mass)
487 gsub(/[][]/, "", std_atomic_mass)
488 if (std_atomic_mass ~ /,/) {
489 split(std_atomic_mass, range, /,/)
490 std_atomic_mass = (range[1] + range[2]) / 2
491 }
492 if (std_atomic_mass_str ~ /\[[[:digit:].]+\]/)
493 most_stable_mass = std_atomic_mass
494 last_atomic_number = atomic_number
495}
496
497END {
498 if (show_no_std_atomic_mass) {
499 if (! std_atomic_mass_str)
500 print names[last_atomic_number]
501 }
502 else
503 output(last_atomic_number)
504} ' $*