Initial commit
diff --git a/elemcvt.sh b/elemcvt.sh
new file mode 100755
index 0000000..98e8d7f
--- /dev/null
+++ b/elemcvt.sh
@@ -0,0 +1,504 @@
+#! /bin/sh
+# ******************************************************************************
+# elemcvt: convert NIST "Linear ASCII" table of elements to units(1) format
+# Usage: elemcvt [options] [<file>]
+# Author: Jeff Conrad
+# Date: 2024-01-06
+# ******************************************************************************
+
+# Adjust PATH to suit.
+# For Windows w/MKS Toolkit, this assumes /bin is a symbolic link to
+# $ROOTDIR/mksnt.
+
+PATH=/bin
+
+progname=${0##*[/\\]}
+progname=${progname%.sh}
+export TITLEBAR=$progname
+
+umsg="Usage: $progname [options] [file]
+Options:
+ -d Show elements for which no standard atomic mass is given and exit
+ -v Verbose"
+
+show_no_std_atomic_mass= # show elements for which no std atomic mass is given
+verbose=
+errors=
+DUALCASE=1 # used in MKS Toolkit to make options case sensitive
+
+while getopts :dv arg
+do
+ case $arg in
+ d)
+ show_no_std_atomic_mass=YES ;;
+ v)
+ verbose=YES ;;
+ :)
+ # OPTARG contains the option missing the argument
+ print -ru2 -- "$progname: option $OPTARG requires an argument"
+ errors=YES
+ ;;
+ [?])
+ # OPTARG contains the invalid option
+ print -ru2 -- "$progname: unknown option $OPTARG"
+ errors=YES
+ ;;
+ esac
+done
+shift $((OPTIND - 1))
+unset DUALCASE
+
+if [ -n "$errors" ]
+then
+ print -ru2 -- "$umsg"
+ exit 1
+fi
+
+awk '
+
+function show_element_info(atomic_number, atomic_symbol, std_atomic_mass_str)
+{
+ printf("# %s: %s (%d)", names[atomic_number], atomic_symbol, atomic_number)
+ if (std_atomic_mass_str)
+ printf(" std atomic weight: %s", std_atomic_mass_str)
+ print ""
+}
+
+# <name>_<atomic num> <mass> # <mole fraction>
+function show_isotope(name, num)
+{
+ printf("%-*s%*s%*.*f", max_isotope_len, sprintf("%s_%d", name, num),
+ sepwid, " ", isoprecis + 4, isoprecis, mass[num])
+ if (composition[num])
+ printf(" # %.*f", compprecis, composition[num])
+ print ""
+}
+
+function show_element_name(name)
+{
+ printf("%-*s%*s", max_name_len, name, sepwid, " ")
+}
+
+# <mole fraction> <name>_<mass num>
+function mole_fraction(atomic_number, names, mass_num, mass_wid)
+{
+ if (composition[mass_num] == 1) {
+ mass_wid = length(int(mass_number[n_isotopes]))
+ # align with 1st digit of atomic mass
+ printf("%*s%s_%d", sepwid + 2 - mass_wid, " ", names[atomic_number], mass_num)
+ }
+ else
+ printf("%.*f %s_%d", compprecis, composition[mass_num],
+ names[atomic_number], mass_num)
+}
+
+# add line continuation and '+' sign
+function add_continuation()
+{
+ printf(" \\\n")
+ printf("%-*s+ ", max_name_len + sepwid - 2, " ")
+}
+
+# <name>_<mass num> # most stable
+function use_most_stable(atomic_number, mass, mass_wid)
+{
+ mass_wid = length(int(mass_number[n_isotopes]))
+ printf("%*s%-*s # most stable", sepwid + 2 - mass_wid, " ",
+ isoprecis + 1 + mass_wid, sprintf("%s_%d", names[atomic_number], mass))
+}
+
+# <name>_<mass num> # standard atomic mass
+function use_std_mass(atomic_number, mass, mass_wid)
+{
+ mass_wid = length(int(mass_number[n_isotopes]))
+ printf("%*s%-*s # standard atomic mass", sepwid + 2 - mass_wid, " ",
+ isoprecis + 1 + mass_wid, sprintf("%s_%d", names[atomic_number], mass))
+}
+
+# show isotopes and the sum of mole fraction-mass products
+function show_element_data(atomic_number, names)
+{
+ # isotopes and relative abundances
+ for (ndx = 1; ndx <= n_isotopes; ndx++) {
+ mass_num = mass_number[ndx]
+ show_isotope(names[atomic_number], mass_num)
+ }
+
+ # show a value for atomic mass if one of these is available;
+ # otherwise, show only isotope masses.
+ if (total_composition > 0 || most_stable_mass || std_atomic_mass)
+ show_element = 1
+ else
+ show_element = 0
+
+ # atomic mass: sum of mole fraction-mass products
+ # element name
+ if (show_element)
+ show_element_name(names[atomic_number])
+
+ mass_num = mass_number[1]
+ firstval = 0
+
+ # first isotope
+ if (composition[mass_num] > 0) {
+ mole_fraction(atomic_number, names, mass_num)
+ firstval = 1
+ }
+ if (n_isotopes > 1) {
+ for (ndx = 2; ndx < n_isotopes; ndx++) {
+ mass_num = mass_number[ndx]
+ if (composition[mass_num] > 0) {
+ if (firstval == 1)
+ add_continuation()
+ mole_fraction(atomic_number, names, mass_num)
+ firstval = 1
+ }
+ }
+ # last isotope
+ mass_num = mass_number[n_isotopes]
+ if (composition[mass_num] > 0) {
+ if (firstval == 1)
+ add_continuation()
+ mole_fraction(atomic_number, names, mass_num)
+ print ""
+ }
+ }
+ else
+ print ""
+
+ # options if mole fraction is not given for any isotope
+ if (total_composition == 0) {
+ if (most_stable_mass)
+ use_most_stable(atomic_number, most_stable_mass)
+ else if (std_atomic_mass)
+ use_std_mass(atomic_number, std_atomic_mass)
+ }
+
+ if (! composition[mass_number[n_isotopes]])
+ print ""
+}
+
+function output(atomic_number)
+{
+ sepwid = 5 # width of column separation
+ compprecis = 8 # for mole fraction
+ isoprecis = 10 # for isotope mass
+
+ # NIST show H, D, and T
+ if (atomic_number == 1)
+ atomic_symbol = "H"
+
+ show_element_info(atomic_number, atomic_symbol, std_atomic_mass_str)
+
+ if (am_names[atomic_number])
+ print "# IUPAC spelling"
+
+ show_element_data(atomic_number, names)
+
+ # show American spelling if different from IUPAC
+ if (am_names[atomic_number]) {
+ print "# American spelling"
+ show_element_data(atomic_number, am_names)
+ }
+ if (show_element)
+ print "" # blank line between elements
+
+ most_stable_mass = 0
+ total_composition = 0
+}
+
+function gnu_notes()
+{
+ print "\
+# This file is the elements database for use with GNU units, a units\n\
+# conversion program by Adrian Mariano adrianm@gnu.org\n\
+#\n\
+# January 2024 Version 1.0\n\
+#\n\
+# Copyright (C) 2024\n\
+# Free Software Foundation, Inc\n\
+#\n\
+# This program is free software; you can redistribute it and/or modify\n\
+# it under the terms of the GNU General Public License as published by\n\
+# the Free Software Foundation; either version 3 of the License, or\n\
+# (at your option) any later version.\n\
+#\n\
+# This data is distributed in the hope that it will be useful,\n\
+# but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
+# GNU General Public License for more details.\n\
+#\n\
+# You should have received a copy of the GNU General Public License\n\
+# along with this program; if not, write to the Free Software\n\
+# Foundation, Inc., 51 Franklin Street, Fifth Floor,\n\
+# Boston, MA 02110-1301 USA\n"
+}
+
+function nist_notes()
+{
+ print "# From https://www.nist.gov/pml/atomic-weights-and-isotopic-compositions-relative-atomic-masses\n"
+
+ # notes from https://www.nist.gov/pml/atomic-weights-and-isotopic-compositions-column-descriptions
+ print "\
+# For several elements, the standard atomic weight A_r is given as an\n\
+# atomic-weight interval with the symbol [a,b] to denote the set of\n\
+# atomic-weight values in normal materials; thus, [a <= A_r(E) <= b].\n\
+# The symbols a and b denote the lower and upper bounds of the\n\
+# interval [a,b], respectively. The values in parentheses, following\n\
+# the last significant digit to which they are attributed, are\n\
+# uncertainties.\n\
+#\n\
+# Brackets [ ] enclosing a single value indicate the mass number of\n\
+# the most stable isotope. For radioactive elements with atomic\n\
+# numbers 95 or greater, the mass number of the most stable isotope is\n\
+# not specified, as the list of studied isotopes is still\n\
+# incomplete.\n"
+}
+
+function units_notes()
+{
+ print "\
+# When composition mole fractions of isotopes are given, the atomic mass\n\
+# of an element is given as the sum of the product(s) of mole\n\
+# fraction(s) and the atomic masses of the relevant isotopes. When composition\n\
+# mole fractions are not given, the atomic mass is given as\n\
+#\n\
+# * the mass of the most stable isotope, if available, or\n\
+# * the standard atomic mass of the element, if available.\n\
+#\n\
+# If neither the most stable isotope nore a standard atomic mass is\n\
+# available, no atomic mass for the element is given; the user must\n\
+# select the isotope most suitable for their purposes.\n\
+#\n\
+# If the standard atomic mass is a range, the value given is the\n\
+# midpoint of that range, which may differ from the value determined\n\
+# from the sum of the products of composition mole fraction and isotope\n\
+# atomic mass.\n"
+}
+
+BEGIN {
+ FS = " *= *"
+ show_no_std_atomic_mass = "'"$show_no_std_atomic_mass"'"
+ verbose = "'"$verbose"'"
+ console = "/dev/console"
+
+ # IUPAC spellings
+ names[1] = "hydrogen"
+ names[2] = "helium"
+ names[3] = "lithium"
+ names[4] = "beryllium"
+ names[5] = "boron"
+ names[6] = "carbon"
+ names[7] = "nitrogen"
+ names[8] = "oxygen"
+ names[9] = "fluorine"
+ names[10] = "neon"
+ names[11] = "sodium"
+ names[12] = "magnesium"
+ names[13] = "aluminium"
+ names[14] = "silicon"
+ names[15] = "phosphorus"
+ names[16] = "sulfur"
+ names[17] = "chlorine"
+ names[18] = "argon"
+ names[19] = "potassium"
+ names[20] = "calcium"
+ names[21] = "scandium"
+ names[22] = "titanium"
+ names[23] = "vanadium"
+ names[24] = "chromium"
+ names[25] = "manganese"
+ names[26] = "iron"
+ names[27] = "cobalt"
+ names[28] = "nickel"
+ names[29] = "copper"
+ names[30] = "zinc"
+ names[31] = "gallium"
+ names[32] = "germanium"
+ names[33] = "arsenic"
+ names[34] = "selenium"
+ names[35] = "bromine"
+ names[36] = "krypton"
+ names[37] = "rubidium"
+ names[38] = "strontium"
+ names[39] = "yttrium"
+ names[40] = "zirconium"
+ names[41] = "niobium"
+ names[42] = "molybdenum"
+ names[43] = "technetium"
+ names[44] = "ruthenium"
+ names[45] = "rhodium"
+ names[46] = "palladium"
+ names[47] = "silver"
+ names[48] = "cadmium"
+ names[49] = "indium"
+ names[50] = "tin"
+ names[51] = "antimony"
+ names[52] = "tellurium"
+ names[53] = "iodine"
+ names[54] = "xenon"
+ names[55] = "caesium"
+ names[56] = "barium"
+ names[57] = "lanthanum"
+ names[58] = "cerium"
+ names[59] = "praseodymium"
+ names[60] = "neodymium"
+ names[61] = "promethium"
+ names[62] = "samarium"
+ names[63] = "europium"
+ names[64] = "gadolinium"
+ names[65] = "terbium"
+ names[66] = "dysprosium"
+ names[67] = "holmium"
+ names[68] = "erbium"
+ names[69] = "thulium"
+ names[70] = "ytterbium"
+ names[71] = "lutetium"
+ names[72] = "hafnium"
+ names[73] = "tantalum"
+ names[74] = "tungsten"
+ names[75] = "rhenium"
+ names[76] = "osmium"
+ names[77] = "iridium"
+ names[78] = "platinum"
+ names[79] = "gold"
+ names[80] = "mercury"
+ names[81] = "thallium"
+ names[82] = "lead"
+ names[83] = "bismuth"
+ names[84] = "polonium"
+ names[85] = "astatine"
+ names[86] = "radon"
+ names[87] = "francium"
+ names[88] = "radium"
+ names[89] = "actinium"
+ names[90] = "thorium"
+ names[91] = "protactinium"
+ names[92] = "uranium"
+ names[93] = "neptunium"
+ names[94] = "plutonium"
+ names[95] = "americium"
+ names[96] = "curium"
+ names[97] = "berkelium"
+ names[98] = "californium"
+ names[99] = "einsteinium"
+ names[100] = "fermium"
+ names[101] = "mendelevium"
+ names[102] = "nobelium"
+ names[103] = "lawrencium"
+ names[104] = "rutherfordium"
+ names[105] = "dubnium"
+ names[106] = "seaborgium"
+ names[107] = "bohrium"
+ names[108] = "hassium"
+ names[109] = "meitnerium"
+ names[110] = "darmstadtium"
+ names[111] = "roentgenium"
+ names[112] = "copernicium"
+ names[113] = "nihonium"
+ names[114] = "flerovium"
+ names[115] = "moscovium"
+ names[116] = "livermorium"
+ names[117] = "tennessine"
+ names[118] = "oganesson"
+
+ # American spellings
+ am_names[13] = "aluminum"
+ am_names[55] = "cesium"
+
+ max_name_len = 0 # length of longest element name
+ for (i = 1; i <= 118; i++) {
+ len = length(names[i])
+ if (len > max_name_len) {
+ max_name_len = len;
+ longestname = names[i]
+ }
+ }
+ max_isotope_len = max_name_len + 4 # allow for "_xxx" suffix
+
+ if (! show_no_std_atomic_mass) {
+ gnu_notes()
+ nist_notes()
+ units_notes()
+ }
+
+ if (verbose)
+ printf("Longest element name: %s (%d)\n\n", longestname, max_name_len)
+
+ n_isotopes = 0
+ mass_number[1] = 0
+}
+
+# begin file processing
+
+# skip JavaScript and HTML before data
+NR == 1, $0 ~ /<pre/ { next }
+# skip HTML after data
+$0 ~ /<\/pre>/ { exit }
+
+# remove trailing space and unpaddable spaces
+{
+ gsub(/ /, "")
+ gsub(/ +$/, "")
+}
+
+$1 ~ /Atomic Number/ {
+ last_atomic_number = atomic_number
+ atomic_number = $2 + 0
+ if (atomic_number != last_atomic_number && atomic_number > 1) {
+ if (show_no_std_atomic_mass) {
+ if (! std_atomic_mass_str)
+ print names[last_atomic_number]
+ }
+ else
+ output(last_atomic_number)
+ }
+}
+
+$1 ~ /Atomic Symbol/ {
+ atomic_symbol = $2
+}
+
+$1 ~ /Mass Number/ {
+ if (atomic_number != last_atomic_number) {
+ for (i = 1; i <= n_isotopes; i++)
+ delete mass_number[i]
+ n_isotopes = 0
+ }
+ mass_number[++n_isotopes] = $2
+}
+
+$1 ~ /Relative Atomic Mass/ {
+ atomic_mass = $2
+ sub(/\([[:digit:]#]+\)/, "", atomic_mass)
+ mass[mass_number[n_isotopes]] = atomic_mass
+}
+
+$1 ~ /Isotopic Composition/ {
+ isotopic_composition = $2
+ sub(/\([[:digit:]#]+\)/, "", isotopic_composition)
+ composition[mass_number[n_isotopes]] = isotopic_composition
+ total_composition += isotopic_composition
+}
+
+$1 ~ /Standard Atomic Weight/ {
+ std_atomic_mass = std_atomic_mass_str = $2
+ gsub(/\([^)]+\)/, "", std_atomic_mass)
+ gsub(/[][]/, "", std_atomic_mass)
+ if (std_atomic_mass ~ /,/) {
+ split(std_atomic_mass, range, /,/)
+ std_atomic_mass = (range[1] + range[2]) / 2
+ }
+ if (std_atomic_mass_str ~ /\[[[:digit:].]+\]/)
+ most_stable_mass = std_atomic_mass
+ last_atomic_number = atomic_number
+}
+
+END {
+ if (show_no_std_atomic_mass) {
+ if (! std_atomic_mass_str)
+ print names[last_atomic_number]
+ }
+ else
+ output(last_atomic_number)
+} ' $*