From d71166e20434ba94c39f461db49aa44972cdece5 Mon Sep 17 00:00:00 2001 From: EsGeh Date: Tue, 6 Aug 2019 19:01:27 +0200 Subject: [PATCH] improved directory structure, improved README --- .gitignore | 13 ++- README.md | 88 ++++++++++--------- corpus/exampleTEI.xml | 2 +- doc/html/eoa-tei.html | 1 + examples/{ => obsolete}/minimalTEI.xml | 0 examples/{ => tei}/exampleTEI.xml | 0 .../intermediate/s0/odd_compiled/p5subset.xml | 1 + schema/odd/eoa-tei.odd | 2 +- schema/rnc/eoa-tei.rnc | 1 + schema/s0/odd_compiled/p5subset.xml | 1 - schema/schematron/eoa-tei-schematron.xml | 1 + schema/schematron/eoa-tei-schematron.xsl | 1 + scripts/clean_output.sh | 16 ++-- scripts/updateOutput.sh | 50 ++++++----- scripts/validate_xml.sh | 2 +- scripts/validate_xml_by_schematron.sh | 2 +- 16 files changed, 98 insertions(+), 83 deletions(-) create mode 120000 doc/html/eoa-tei.html rename examples/{ => obsolete}/minimalTEI.xml (100%) rename examples/{ => tei}/exampleTEI.xml (100%) create mode 120000 schema/intermediate/s0/odd_compiled/p5subset.xml create mode 120000 schema/rnc/eoa-tei.rnc delete mode 120000 schema/s0/odd_compiled/p5subset.xml create mode 120000 schema/schematron/eoa-tei-schematron.xml create mode 120000 schema/schematron/eoa-tei-schematron.xsl diff --git a/.gitignore b/.gitignore index 80404f7..a56d3f5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,12 +1,11 @@ -# these are created from the odd: -/schema/s0/ -/schema/s1/ -/schema/s2/ -/doc/generated -/schema/legacy/generated +# autogenerated files: +/schema/intermediate/s0/generated/ +/schema/intermediate/s1/ +/schema/intermediate/s2/ +/schema/legacy/generated/ /dependencies/ -# these files are created/removed by ./scripts/{un,}install.sh +# dependencies: /dependencies/ items/ diff --git a/README.md b/README.md index 75598a2..527d75c 100644 --- a/README.md +++ b/README.md @@ -1,81 +1,89 @@ # EOA publication model + This repository contains the document model for EOA publications. The model is maintained as a TEI ODD file which contains both the definition of the model as well as its documentation. This repository also contains an example file that contains all the phenomena that can occur in an EOA publication. +# Initialising the Repository -# Installation +After cloning the repository, make shure to run this script first: $ ./scripts/init.sh +This will + +- locally download dependencies +- create autogenerated resources (e.g. ODD, schemata) -# Validity checking +# Cleaning up the Repository -check your xml file against one of the schemas in the `schema` directory: +In order to purge all local dependencies and resources, run -- to check if your file is valid tei at all, use one of these schemas: + $ ./scripts/exit.sh - - `./schema/tei/tei_all.rnc` - - `./schema/tei/tei_all.rng` +# Example Publication -- to check if your file is compatible with EOA, in other word only uses tei features supported by EOA, use this schema: +For a short example of a valid EOA publication in TEI syntax, see - - `./schema/legacy/eoa-tei-strict.rnc` (hand made file) - - (see "EXPERIMENTAL") +- `./examles/tei/` +This example file serves the following purposes: -## EXPERIMENTAL +- give a first impression of the +- exhaustively document all features available in EOA documents -- `./schema/generated/rnc/eoa-tei.rnc` : this is the file generated from the `./schema/odd/eoa-tei.odd`. Since the `.odd` file is still experimental and possibly incorrect the autogenerated schema is too! +There is also a version of the same publication in old EOATeX[^eoatex] format here: -## TODO +- `./examples/eoatex/` - - TODO: replace the "experimental" odd file by a stricter and safer one +The TEI version is the more recent format and is recommended for new EOA publications. -Eventually the autogenerated schema is going to replace the hand made one. +[^eoatex]: What is EOATeX? + The TEI example file itself is modeled after the original EOA document model, + which was designed in LaTeX, the set of macros eventually being called EOATeX. -# How to recreate Schemata +# Checking Validity of a Publication -## Dependencies +In order to proof that some XML document is a valid TEI publication consider the schemata in -- pandoc(optional) : only needed if you want odd statistics as HTML. +- `./schema/` -## Scripts +Because EOA has some more complex restrictions, both, a schema (typically in RelaxNG syntax) and a schematron stylesheet needs to be applied, to be completely shure. +There are two helper scripts that automatise this task: -If you changed the odd file, and want to recreate the schemas, html doc, etc: + $ `./scripts/validate_xml.sh` + $ `./scripts/validate_xml.sh` - $ ./scripts/updateOutput.sh +For usage details append the `--help` option. -this (re-)creates files in `./schema/generated` and `./doc/generated` +# HTML Documentation -alternative ways to create schemata from odd: +The documentation of the EOA format can be found in -From the ODD file, documentation and schema files can be created. Several possibilities exist, the most straightforward being -[Roma](http://www.tei-c.org/Roma), the TEI schema generator or oxygen, the XML editor. +- `./doc/html/` -# Verify Schemata +Use your favourite webbrowser to read it. -To check if all files in `./corpus` are valid in respect to the schemata in this repository, issue: +# Abandoned +A further component is a stylesheet for +[ConTeXt](https://en.wikipedia.org/wiki/ConTeXt), which processes the +XML file to a PDF. This is currently incomplete and not maintained. - $ ./scripts/validate_xml.sh +# Information for Developers -# Example Document +The final ODD is the result of a process that involves multiple steps and depends on the following input files: -An example document can be found in +- the TEI example publication +- a schema in RelaxNG Compact syntax -- `./examples/` +After changing one of these resources, you'll have to update all files that depend on them: -The document exists in two formats: TEI, and EOATeX. -The TEI version is the more recent format and is recommended for new EOA publications. +## Updating autogenerated Files -# What is EOATeX? + $ ./scripts/clean_output.sh + $ ./scripts/updateOutput.sh -The TEI file itself is modeled after the original EOA document model, -which was designed in LaTeX, the set of macros eventually being called -EOATeX. A sample project and can be also found in this repository. +### Dependencies -# Abandoned -A further component is a stylesheet for -[ConTeXt](https://en.wikipedia.org/wiki/ConTeXt), which processes the -XML file to a PDF. This is currently incomplete and not maintained. +- pandoc(optional) : only needed if you want ODD statistics as HTML. diff --git a/corpus/exampleTEI.xml b/corpus/exampleTEI.xml index 537e11a..14e58de 120000 --- a/corpus/exampleTEI.xml +++ b/corpus/exampleTEI.xml @@ -1 +1 @@ -../examples/exampleTEI.xml \ No newline at end of file +../examples/tei/exampleTEI.xml \ No newline at end of file diff --git a/doc/html/eoa-tei.html b/doc/html/eoa-tei.html new file mode 120000 index 0000000..bb890d0 --- /dev/null +++ b/doc/html/eoa-tei.html @@ -0,0 +1 @@ +../../schema/intermediate/s2/generated/html/1_simple.html \ No newline at end of file diff --git a/examples/minimalTEI.xml b/examples/obsolete/minimalTEI.xml similarity index 100% rename from examples/minimalTEI.xml rename to examples/obsolete/minimalTEI.xml diff --git a/examples/exampleTEI.xml b/examples/tei/exampleTEI.xml similarity index 100% rename from examples/exampleTEI.xml rename to examples/tei/exampleTEI.xml diff --git a/schema/intermediate/s0/odd_compiled/p5subset.xml b/schema/intermediate/s0/odd_compiled/p5subset.xml new file mode 120000 index 0000000..62cd377 --- /dev/null +++ b/schema/intermediate/s0/odd_compiled/p5subset.xml @@ -0,0 +1 @@ +../../../tei/odd/p5subset.xml \ No newline at end of file diff --git a/schema/odd/eoa-tei.odd b/schema/odd/eoa-tei.odd index 3bceabf..c89875b 120000 --- a/schema/odd/eoa-tei.odd +++ b/schema/odd/eoa-tei.odd @@ -1 +1 @@ -../legacy/eoa-tei.odd \ No newline at end of file +../intermediate/s2/odd/1_simple.odd \ No newline at end of file diff --git a/schema/rnc/eoa-tei.rnc b/schema/rnc/eoa-tei.rnc new file mode 120000 index 0000000..e5159b8 --- /dev/null +++ b/schema/rnc/eoa-tei.rnc @@ -0,0 +1 @@ +../intermediate/s2/generated/rnc/1_simple.rnc \ No newline at end of file diff --git a/schema/s0/odd_compiled/p5subset.xml b/schema/s0/odd_compiled/p5subset.xml deleted file mode 120000 index ad07aeb..0000000 --- a/schema/s0/odd_compiled/p5subset.xml +++ /dev/null @@ -1 +0,0 @@ -../../tei/odd/p5subset.xml \ No newline at end of file diff --git a/schema/schematron/eoa-tei-schematron.xml b/schema/schematron/eoa-tei-schematron.xml new file mode 120000 index 0000000..fcc25f2 --- /dev/null +++ b/schema/schematron/eoa-tei-schematron.xml @@ -0,0 +1 @@ +../intermediate/s2/generated/schematron/1_simple.xml \ No newline at end of file diff --git a/schema/schematron/eoa-tei-schematron.xsl b/schema/schematron/eoa-tei-schematron.xsl new file mode 120000 index 0000000..4cab0be --- /dev/null +++ b/schema/schematron/eoa-tei-schematron.xsl @@ -0,0 +1 @@ +../intermediate/s2/generated/schematron/1_simple.xsl \ No newline at end of file diff --git a/scripts/clean_output.sh b/scripts/clean_output.sh index 2c3c387..533e5e7 100755 --- a/scripts/clean_output.sh +++ b/scripts/clean_output.sh @@ -3,15 +3,19 @@ BASE_DIR=$(dirname $0)/.. SCRIPT_DIR=$(dirname $0) +INTERMEDIATE_DIR="$BASE_DIR/schema/intermediate" + ####################################### # functions ####################################### function print_help { - echo "clean schemas" + echo "clean autogenerated intermediate files in '$INTERMEDIATE_DIR' (schemata, docu)" echo echo "usage: $0 [STAGE]" echo + echo "(if no STAGE is given, cleans all stages)" + echo echo "OPTIONS:" echo " -h | --help: print this help" echo @@ -42,21 +46,19 @@ if [[ "$#" -gt "1" ]]; then exit 1 fi -rm -rfv $BASE_DIR/schema/generated +# rm -rfv $INTERMEDIATE_DIR/generated if [[ "$1" == "" ]] || [[ "$1" == "0" ]]; then echo "cleaning s0:" - rm -rfv $BASE_DIR/schema/s0/generated/odd_cleaned - rm -rfv $BASE_DIR/schema/s0/generated/statistics - rm -rfv $BASE_DIR/schema/s0/generated/statistics_html + rm -rfv $INTERMEDIATE_DIR/s0/generated fi if [ "$1" == "" ] || [ "$1" == "0" ] || [ "$1" == "1" ]; then echo "cleaning s1:" - rm -rfv $BASE_DIR/schema/s1 + rm -rfv $INTERMEDIATE_DIR/s1 fi if [[ "$1" == "" ]] || [[ "$1" == "0" ]] || [[ "$1" == "1" ]] || [[ "$1" == "2" ]]; then echo "cleaning s2:" - rm -rfv $BASE_DIR/schema/s2 + rm -rfv $INTERMEDIATE_DIR/s2 fi diff --git a/scripts/updateOutput.sh b/scripts/updateOutput.sh index aa683da..ffbd7b6 100755 --- a/scripts/updateOutput.sh +++ b/scripts/updateOutput.sh @@ -3,6 +3,8 @@ BASE_DIR=$(dirname $0)/.. SCRIPT_DIR=$(dirname $0) +INTERMEDIATE_DIR="$BASE_DIR/schema/intermediate" + ########################################## # stage 0: p5subset.xml: @@ -12,11 +14,11 @@ SCRIPT_DIR=$(dirname $0) function stage0 () { - output_dir="$BASE_DIR/schema/s0" + output_dir="$INTERMEDIATE_DIR/s0" echo "clean_odd.xsl..." $SCRIPT_DIR/local_stylesheet.sh \ - --input-dir "$output_dir/generated/odd_compiled" \ + --input-dir "$output_dir/odd_compiled" \ --output-dir "$output_dir/generated/odd_cleaned" \ --suffix '.xml' \ --cache \ @@ -60,21 +62,21 @@ echo "##########################################" echo "create ODD from corpus..." $SCRIPT_DIR/example2odd.sh \ --input-dir "$BASE_DIR/corpus" \ - --output-dir "$BASE_DIR/schema/s1/odd" \ + --output-dir "$INTERMEDIATE_DIR/s1/odd" \ --combinations 'all' \ --suffix '.odd' $SCRIPT_DIR/apply_odd.sh \ - --input-dir "$BASE_DIR/schema/s0/odd_compiled/p5subset.xml" \ - --output-dir "$BASE_DIR/schema/s1" + --input-dir "$INTERMEDIATE_DIR/s0/odd_compiled/p5subset.xml" \ + --output-dir "$INTERMEDIATE_DIR/s1" echo "create RELAX-NG Compact..." $SCRIPT_DIR/tei_stylesheet.sh \ --cmd "teitornc" \ - --input-dir "$BASE_DIR/schema/s1/odd" \ - --output-dir "$BASE_DIR/schema/s1/generated/rnc" \ + --input-dir "$INTERMEDIATE_DIR/s1/odd" \ + --output-dir "$INTERMEDIATE_DIR/s1/generated/rnc" \ --suffix '.rnc' \ - --option "--localsource='$BASE_DIR/schema/s0/generated/odd_compiled/p5subset.xml'" \ + --option "--localsource='$INTERMEDIATE_DIR/s0/odd_compiled/p5subset.xml'" \ "$@" ########################################## @@ -83,52 +85,52 @@ echo "##########################################" echo "# stage 2: autogenerate ODD from RELAX-NG schema" echo "##########################################" -mkdir -p $BASE_DIR/schema/s2 -mkdir -p $BASE_DIR/schema/s2/logs +mkdir -p $INTERMEDIATE_DIR/s2 +mkdir -p $INTERMEDIATE_DIR/s2/logs echo "prepare: rnc -> analyis..." $SCRIPT_DIR/rnc_to_analysis.sh \ --verbosity 3 \ - >$BASE_DIR/schema/s2/logs/rnc_to_analysis.log \ + >$INTERMEDIATE_DIR/s2/logs/rnc_to_analysis.log \ 2>&1 echo "prepare: analyis -> odd..." $SCRIPT_DIR/rnc_analysis_to_odd.sh \ --verbosity 3 \ - --output-dir "$BASE_DIR/schema/s2/odd" \ - >$BASE_DIR/schema/s2/logs/rnc_analysis_to_odd.log \ + --output-dir "$INTERMEDIATE_DIR/s2/odd" \ + >$INTERMEDIATE_DIR/s2/logs/rnc_analysis_to_odd.log \ 2>&1 echo "apply odd" $SCRIPT_DIR/apply_odd.sh \ - --input-dir "$BASE_DIR/schema/s1/generated/odd_compiled/autogen_all.xml" \ - --output-dir "$BASE_DIR/schema/s2" + --input-dir "$INTERMEDIATE_DIR/s1/generated/odd_compiled/autogen_all.xml" \ + --output-dir "$INTERMEDIATE_DIR/s2" echo "create RELAX-NG Compact..." $SCRIPT_DIR/tei_stylesheet.sh \ --cmd "teitornc" \ - --input-dir "$BASE_DIR/schema/s1/odd" \ - --output-dir "$BASE_DIR/schema/s1/generated/rnc" \ + --input-dir "$INTERMEDIATE_DIR/s2/odd" \ + --output-dir "$INTERMEDIATE_DIR/s2/generated/rnc" \ --suffix '.rnc' \ - --option "--localsource='$BASE_DIR/schema/s0/generated/odd_compiled/p5subset.xml'" \ + --option "--localsource='$INTERMEDIATE_DIR/s0/odd_compiled/p5subset.xml'" \ "$@" echo "create schematron (for more complex rules)..." $SCRIPT_DIR/tei_stylesheet.sh \ - --input-dir "$BASE_DIR/schema/s2/odd" \ - --output-dir "$BASE_DIR/schema/s2/generated/schematron" \ + --input-dir "$INTERMEDIATE_DIR/s2/odd" \ + --output-dir "$INTERMEDIATE_DIR/s2/generated/schematron" \ --cmd "teitoschematron" \ --suffix '.xml' \ - >$BASE_DIR/schema/s2/logs/teitoschematron.log \ + >$INTERMEDIATE_DIR/s2/logs/teitoschematron.log \ 2>&1 echo "schematron -> xslt 2" $SCRIPT_DIR/local_stylesheet.sh \ - --input-dir "$BASE_DIR/schema/s2/generated/schematron" \ - --output-dir "$BASE_DIR/schema/s2/generated/schematron" \ + --input-dir "$INTERMEDIATE_DIR/s2/generated/schematron" \ + --output-dir "$INTERMEDIATE_DIR/s2/generated/schematron" \ --suffix '.xsl' \ "schematron/iso_svrl_for_xslt2.xsl" \ - >$BASE_DIR/schema/s2/logs/schematron_to_xsl.log \ + >$INTERMEDIATE_DIR/s2/logs/schematron_to_xsl.log \ 2>&1 echo "done" diff --git a/scripts/validate_xml.sh b/scripts/validate_xml.sh index 15d2139..b7b37f5 100755 --- a/scripts/validate_xml.sh +++ b/scripts/validate_xml.sh @@ -4,7 +4,7 @@ BASE_DIR=$(dirname $0)/.. DEP_DIR=$BASE_DIR/dependencies input_dir="$BASE_DIR/corpus" -schema_dir="$BASE_DIR/schema/s2/generated/rnc" +schema_dir="$BASE_DIR/schema/rnc" input_pattern='*.xml' diff --git a/scripts/validate_xml_by_schematron.sh b/scripts/validate_xml_by_schematron.sh index a4c9ebc..ea3b615 100755 --- a/scripts/validate_xml_by_schematron.sh +++ b/scripts/validate_xml_by_schematron.sh @@ -6,7 +6,7 @@ DEP_DIR=$BASE_DIR/dependencies input_dir="$BASE_DIR/corpus" output_dir="-" -schema_dir="$BASE_DIR/schema/s2/generated/schematron" +schema_dir="$BASE_DIR/schema/schematron" input_pattern='*.xml'