Skip to content
Permalink
93de43745e
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
executable file 290 lines (256 sloc) 7.95 KB
#!/bin/bash
BASE_DIR=$(dirname $0)/..
SCRIPT_DIR=$(dirname $0)
DEP_DIR=$BASE_DIR/dependencies
INTERMEDIATE_DIR="$BASE_DIR/schema/intermediate"
INTERMEDIATE_OUTPUT_0=( "cleaned" "statistics" )
INTERMEDIATE_OUTPUT_1=( "cleaned" "statistics" )
INTERMEDIATE_OUTPUT_2=( "cleaned" "statistics" "rnc" "rng" "schema" "schematron" )
# break script on first error
# set -e
# set -o pipefail
if ! which realpath; then
if which 'grealpath'; then
alias realpath=grealpath
else
echo "realpath not found (also 'grealpath' is not available)."
echo "On mac, please install 'coreutils', e.g. using homebrew"
exit 1
fi
fi
##########################################
# stage 0: p5subset.xml:
##########################################
# ODD -> * (schemata)
function im_output () {
echo "intermediate output...:"
local dir="$1"
shift
local local_src="$1"
shift
local dump_these_resources=( "$@" )
echo "dir: $dir"
echo "local_src: $local_src"
if [[ " ${dump_these_resources[@]} " =~ " cleaned " ]]; then
echo "ODD -> illiterate ODD"
$SCRIPT_DIR/utils/for_each_input.sh \
--input-dir "$dir/odd_compiled" \
--output-dir "$dir/generated/odd_cleaned" \
--print-cmd \
--log-file "$dir/logs/clean_odd.log" \
-- \
$SCRIPT_DIR/saxon.sh \
"$BASE_DIR/stylesheets/clean_odd.xsl" '-s:$INPUT' '-o:$OUTPUT' \
| sed 's/^/ /' \
|| exit 1
fi
if [[ " ${dump_these_resources[@]} " =~ " statistics " ]]; then
echo "illiterate ODD -> statistics"
$SCRIPT_DIR/utils/for_each_input.sh \
--input-dir "$dir/generated/odd_cleaned" \
--output-dir "$dir/generated/statistics" \
--suffix '.md' \
--print-cmd \
--log-file "$dir/logs/statistics.log" \
-- \
$SCRIPT_DIR/saxon.sh \
"$BASE_DIR/stylesheets/statistics.xsl" '-s:$INPUT' '-o:$OUTPUT' \
| sed 's/^/ /' \
|| exit 1
# echo "htmlstatistics.xsl..."
which pandoc &> /dev/null
if [[ "$?" != "0" ]]; then
echo "WARNING: pandoc not found. No HTML statistics generated."
else
$SCRIPT_DIR/utils/for_each_input.sh \
--input-dir "$dir/generated/statistics" \
--output-dir "$dir/generated/statistics_html" \
--suffix '.html' \
--print-cmd \
--log-file "$dir/logs/statistics_html.log" \
-- \
"pandoc" -o '$OUTPUT' '$INPUT' \
| sed 's/^/ /' \
|| exit 1
fi
fi
if [[ " ${dump_these_resources[@]} " =~ " rnc " ]]; then
echo "ODD -> RELAX-NG Compact..."
local local_src_opt=""
if [[ "$local_src" != "" ]]; then
local_src_opt="--localsource='$local_src'"
fi
$SCRIPT_DIR/utils/for_each_input.sh \
--input-dir "$dir/odd" \
--output-dir "$dir/generated/rnc" \
--suffix '.rnc' \
--log-file "$dir/logs/teitornc.log" \
--print-cmd \
-- \
"$DEP_DIR/Stylesheets/bin/teitornc" \
"$local_src_opt" \
'$INPUT' '$OUTPUT' \
| sed 's/^/ /' \
|| exit 1
fi
if [[ " ${dump_these_resources[@]} " =~ " rng " ]]; then
echo "ODD -> RELAX-NG..."
local local_src_opt=""
if [[ "$local_src" != "" ]]; then
local_src_opt="--localsource='$local_src'"
fi
$SCRIPT_DIR/utils/for_each_input.sh \
--input-dir "$dir/odd" \
--output-dir "$dir/generated/rng" \
--suffix '.rng' \
--log-file "$dir/logs/teitorelaxng.log" \
--print-cmd \
-- \
"$DEP_DIR/Stylesheets/bin/teitorelaxng" \
"$local_src_opt" \
'$INPUT' '$OUTPUT' \
| sed 's/^/ /' \
|| exit 1
fi
if [[ " ${dump_these_resources[@]} " =~ " schema " ]]; then
echo "ODD -> XML Schema..."
local local_src_opt=""
if [[ "$local_src" != "" ]]; then
local_src_opt="--localsource='$local_src'"
fi
$SCRIPT_DIR/utils/for_each_input.sh \
--input-dir "$dir/odd" \
--output-dir "$dir/generated/xsd" \
--suffix '.xsd' \
--log-file "$dir/logs/teitoxsd.log" \
--print-cmd \
-- \
"$DEP_DIR/Stylesheets/bin/teitoxsd" \
"$local_src_opt" \
'$INPUT' '$OUTPUT' \
| sed 's/^/ /' \
|| exit 1
fi
if [[ " ${dump_these_resources[@]} " =~ " schematron " ]]; then
echo "ODD -> Schematron"
local local_src_opt=""
if [[ "$local_src" != "" ]]; then
local_src_opt="--localsource='$local_src'"
fi
$SCRIPT_DIR/utils/for_each_input.sh \
--input-dir "$dir/odd" \
--output-dir "$dir/generated/schematron" \
--suffix '.xml' \
--log-file "$dir/logs/teitoschematron.log" \
--print-cmd \
-- \
"$DEP_DIR/Stylesheets/bin/teitoschematron" \
"$local_src_opt" \
'$INPUT' '$OUTPUT' \
| sed 's/^/ /' \
|| exit 1
echo "Schematron -> xslt 2"
$SCRIPT_DIR/utils/for_each_input.sh \
--input-dir "$dir/generated/schematron" \
--output-dir "$dir/generated/schematron" \
--filter '*.xml' \
--suffix '.xsl' \
--log-file "$dir/logs/schematrontoxslt2.log" \
--print-cmd \
-- \
$SCRIPT_DIR/saxon.sh \
"$BASE_DIR/stylesheets/schematron/iso_svrl_for_xslt2.xsl" \
'-s:$INPUT' '-o:$OUTPUT' \
| sed 's/^/ /' \
|| exit 1
fi
}
function stage0 () {
echo "##########################################"
echo "# stage 0: the TEI guidelines"
echo "##########################################"
output_dir="$INTERMEDIATE_DIR/s0"
for x in "${INTERMEDIATE_OUTPUT_0[@]}"; do
if [[ "$x" != 'cleaned' ]] && [[ "$x" != 'statistics' ]]; then
echo "ERROR: in stage 0: cannot create intermediate output"
echo "of type '$x', because we don't have the uncompiled ODD"
echo "only possible values: cleaned|statistics"
exit 1
fi
done
im_output "$output_dir" "" "${INTERMEDIATE_OUTPUT_0[@]}"
}
stage0 || exit 1
##########################################
echo "##########################################"
echo "# stage 1: generate ODD from corpus"
echo "##########################################"
mkdir -pv "$INTERMEDIATE_DIR/s1/logs"
echo "create ODD from corpus from example files in '"$(realpath "$BASE_DIR/schema/resource?select=*.xml")"'"
$SCRIPT_DIR/utils/run_with_cache.sh \
--output-file "$INTERMEDIATE_DIR/s1/odd/tmp.odd" \
--cache 1 \
--log-file "$INTERMEDIATE_DIR/s1/logs/oddbyexample.log" \
--print-cmd \
-- \
"$SCRIPT_DIR/saxon.sh" \
"$DEP_DIR/Stylesheets/tools/oddbyexample.xsl" \
'-o:$OUTPUT' \
'-it:main' \
corpusList=$(realpath "$BASE_DIR/schema/resource?select=*.xml") \
defaultSource=$(realpath "$BASE_DIR/schema/tei/odd/p5subset.xml") \
debug=true \
verbose=true \
| sed 's/^/ /' \
|| exit 1
echo "compile odd..."
$SCRIPT_DIR/utils/for_each_input.sh \
--input-dir "$INTERMEDIATE_DIR/s1/odd" \
--output-dir "$INTERMEDIATE_DIR/s1/odd_compiled" \
--suffix '.xml' \
--log-file "$INTERMEDIATE_DIR/s1/logs/teitoodd.log" \
--print-cmd \
-- \
"$DEP_DIR/Stylesheets/bin/teitoodd" \
--localsource="$INTERMEDIATE_DIR/s0/odd_compiled/p5subset.xml" \
'$INPUT' '$OUTPUT' \
| sed 's/^/ /' \
|| exit 1
im_output "$INTERMEDIATE_DIR/s1" "$INTERMEDIATE_DIR/s0/odd_compiled/p5subset.xml" "${INTERMEDIATE_OUTPUT_1[@]}"
##########################################
echo "##########################################"
echo "# stage 2: autogenerate ODD from RELAX-NG schema"
echo "##########################################"
echo "prepare: rnc -> analyis..."
mkdir -p $INTERMEDIATE_DIR/s2/logs
$SCRIPT_DIR/rnc_to_analysis.sh \
--input "$BASE_DIR/schema/resource/eoa-tei-strict.rnc" \
--output-dir "$INTERMEDIATE_DIR/s2/prepare/rnc_analysis" \
--verbosity 3 \
>$INTERMEDIATE_DIR/s2/logs/rnc_to_analysis.log \
2>&1 \
|| exit 1
echo "analyis -> odd..."
$SCRIPT_DIR/rnc_analysis_to_odd.sh \
--verbosity 3 \
--input-dir "$INTERMEDIATE_DIR/s2/prepare/rnc_analysis" \
--output-dir "$INTERMEDIATE_DIR/s2/odd" \
--modules 'core,tei,header,namesdates,figures,transcr,linking,textstructure' \
>$INTERMEDIATE_DIR/s2/logs/rnc_analysis_to_odd.log \
2>&1 \
|| exit 1
echo "compile odd..."
$SCRIPT_DIR/utils/for_each_input.sh \
--input-dir "$INTERMEDIATE_DIR/s2/odd" \
--output-dir "$INTERMEDIATE_DIR/s2/odd_compiled" \
--suffix '.xml' \
--log-file "$INTERMEDIATE_DIR/s2/logs/teitoodd.log" \
--print-cmd \
-- \
"$DEP_DIR/Stylesheets/bin/teitoodd" \
--localsource="$INTERMEDIATE_DIR/s1/odd_compiled/tmp.xml" \
'$INPUT' '$OUTPUT' \
| sed 's/^/ /' \
|| exit 1
im_output "$INTERMEDIATE_DIR/s2" "$INTERMEDIATE_DIR/s1/odd_compiled/tmp.xml" "${INTERMEDIATE_OUTPUT_2[@]}"
echo "done"