From 40befea0363918488b9bedbaf693e9c7b3b8e44e Mon Sep 17 00:00:00 2001 From: Miklos Vajna Date: Sat, 21 Jun 2014 10:51:49 +0200 Subject: writerfilter: convert modelpreprocess to Python Change-Id: I8c63d8d1bcf4247491e4be22d1e37e1eefe1e7d1 --- writerfilter/CustomTarget_source.mk | 19 +- writerfilter/source/ooxml/modelpreprocess.py | 183 ++++++++++++++++++ writerfilter/source/ooxml/modelpreprocess.xsl | 213 --------------------- .../source/resourcemodel/namespace_preprocess.pl | 68 ------- 4 files changed, 187 insertions(+), 296 deletions(-) create mode 100644 writerfilter/source/ooxml/modelpreprocess.py delete mode 100644 writerfilter/source/ooxml/modelpreprocess.xsl delete mode 100644 writerfilter/source/resourcemodel/namespace_preprocess.pl diff --git a/writerfilter/CustomTarget_source.mk b/writerfilter/CustomTarget_source.mk index 4452c54a3c6d..55ba50ae6d32 100644 --- a/writerfilter/CustomTarget_source.mk +++ b/writerfilter/CustomTarget_source.mk @@ -67,18 +67,16 @@ writerfilter_GEN_ooxml_GperfFastToken_hxx=$(writerfilter_WORK)/gperffasttoken.hx writerfilter_GEN_ooxml_Model_processed=$(writerfilter_WORK)/model_preprocessed.xml writerfilter_GEN_ooxml_NamespaceIds_hxx=$(writerfilter_WORK)/ooxml/OOXMLnamespaceids.hxx writerfilter_GEN_ooxml_Namespacesmap_xsl=$(writerfilter_WORK)/namespacesmap.xsl -writerfilter_GEN_ooxml_Preprocess_xsl=$(writerfilter_WORK)/modelpreprocess.xsl writerfilter_GEN_ooxml_QNameToStr_cxx=$(writerfilter_WORK)/ooxml/qnametostr.cxx writerfilter_GEN_ooxml_ResourceIds_hxx=$(writerfilter_WORK)/ooxml/resourceids.hxx writerfilter_GEN_ooxml_Token_xml=$(writerfilter_WORK)/token.xml -writerfilter_SRC_model_NamespacePreprocess=$(writerfilter_SRC)/resourcemodel/namespace_preprocess.pl writerfilter_SRC_ooxml_FactoryTools_xsl=$(writerfilter_SRC)/ooxml/factorytools.xsl writerfilter_SRC_ooxml_FactoryValues_xsl=$(writerfilter_SRC)/ooxml/factory_values.xsl writerfilter_SRC_ooxml_FastTokens_py=$(writerfilter_SRC)/ooxml/fasttokens.py writerfilter_SRC_ooxml_GperfFastTokenHandler_py=$(writerfilter_SRC)/ooxml/gperffasttokenhandler.py writerfilter_SRC_ooxml_Model=$(writerfilter_SRC)/ooxml/model.xml writerfilter_SRC_ooxml_NamespaceIds_xsl=$(writerfilter_SRC)/ooxml/namespaceids.xsl -writerfilter_SRC_ooxml_Preprocess_xsl=$(writerfilter_SRC)/ooxml/modelpreprocess.xsl +writerfilter_SRC_ooxml_Preprocess_py=$(writerfilter_SRC)/ooxml/modelpreprocess.py writerfilter_SRC_ooxml_QNameToStr_xsl=$(writerfilter_SRC)/ooxml/qnametostr.xsl writerfilter_SRC_ooxml_ResourceIds_xsl=$(writerfilter_SRC)/ooxml/resourceids.xsl @@ -103,23 +101,14 @@ $(writerfilter_GEN_ooxml_GperfFastToken_hxx) : $(writerfilter_SRC_ooxml_GperfFas $(call gb_Helper_abbreviate_dirs, $(writerfilter_PYTHONCOMMAND) $(writerfilter_SRC_ooxml_GperfFastTokenHandler_py) $(writerfilter_GEN_ooxml_Token_xml)) \ | tr -d '\r' | $(GPERF) -c -E -G -I -LC++ -S1 -t > $@ -$(writerfilter_GEN_ooxml_Model_processed) : $(writerfilter_GEN_ooxml_Namespacesmap_xsl) $(writerfilter_GEN_ooxml_Preprocess_xsl) $(writerfilter_SRC_ooxml_Model) - $(call gb_Output_announce,$(subst $(WORKDIR)/,,$@),build,XSL,1) - $(call gb_Helper_abbreviate_dirs, $(writerfilter_XSLTCOMMAND) $(writerfilter_GEN_ooxml_Namespacesmap_xsl) $(writerfilter_SRC_ooxml_Model)) > $@ +$(writerfilter_GEN_ooxml_Model_processed) : $(writerfilter_SRC_ooxml_Preprocess_py) $(writerfilter_DEP_ooxml_Namespaces_txt) $(writerfilter_SRC_ooxml_Model) | $(writerfilter_WORK)/.dir + $(call gb_Output_announce,$(subst $(WORKDIR)/,,$@),build,PY ,1) + $(call gb_Helper_abbreviate_dirs, $(writerfilter_PYTHONCOMMAND) $(writerfilter_SRC_ooxml_Preprocess_py) $(writerfilter_DEP_ooxml_Namespaces_txt) $(writerfilter_SRC_ooxml_Model)) > $@ $(writerfilter_GEN_ooxml_NamespaceIds_hxx) : $(writerfilter_SRC_ooxml_NamespaceIds_xsl) $(writerfilter_GEN_ooxml_Model_processed) | $(writerfilter_WORK)/ooxml/.dir $(call gb_Output_announce,$(subst $(WORKDIR)/,,$@),build,XSL,1) $(call gb_Helper_abbreviate_dirs, $(writerfilter_XSLTCOMMAND) $(writerfilter_SRC_ooxml_NamespaceIds_xsl) $(writerfilter_GEN_ooxml_Model_processed)) > $@ -$(writerfilter_GEN_ooxml_Namespacesmap_xsl) : $(writerfilter_SRC_ooxml_Model) $(writerfilter_DEP_ooxml_Namespaces_txt) \ - $(writerfilter_SRC_model_NamespacePreprocess) | $(writerfilter_WORK)/.dir - $(call gb_Output_announce,$(subst $(WORKDIR)/,,$@),build,PRL,1) - $(PERL) $(writerfilter_SRC_model_NamespacePreprocess) $(writerfilter_DEP_ooxml_Namespaces_txt) > $@ - -$(writerfilter_GEN_ooxml_Preprocess_xsl) : $(writerfilter_SRC_ooxml_Preprocess_xsl) | $(writerfilter_WORK)/.dir - $(call gb_Output_announce,$(subst $(WORKDIR)/,,$@),build,CPY,1) - cp -f $(writerfilter_SRC_ooxml_Preprocess_xsl) $@ - $(writerfilter_GEN_ooxml_QNameToStr_cxx): $(writerfilter_SRC_ooxml_QNameToStr_xsl) $(writerfilter_SRC_ooxml_FactoryTools_xsl) $(writerfilter_GEN_ooxml_Model_processed) $(call gb_Output_announce,$(subst $(WORKDIR)/,,$@),build,XSL,1) $(call gb_Helper_abbreviate_dirs, $(writerfilter_XSLTCOMMAND) $(writerfilter_SRC_ooxml_QNameToStr_xsl) $(writerfilter_GEN_ooxml_Model_processed)) > $@ diff --git a/writerfilter/source/ooxml/modelpreprocess.py b/writerfilter/source/ooxml/modelpreprocess.py new file mode 100644 index 000000000000..022568f91ded --- /dev/null +++ b/writerfilter/source/ooxml/modelpreprocess.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python +# +# This file is part of the LibreOffice project. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# + +from __future__ import print_function +from xml.dom import minidom +import sys + + +def prefixFromUrl(url): + if url in list(namespaceAliases.keys()): + return namespaceAliases[url] + else: + if url.startswith("http://"): + return url.replace('http://', '').replace('/', '_').replace('.', '_') + else: + return "" + + +def prefixForGrammar(namespace): + ns = nsForGrammar(namespace) + if ns in list(namespaceAliases.keys()): + prefix = namespaceAliases[ns] + return prefix + else: + return prefixFromUrl(ns) + + +def nsForGrammar(namespace): + return namespace.getElementsByTagName("grammar")[0].getAttribute("ns") + + +def parseNamespaceAliases(node, ret): + for k, v in list(node.attributes.items()): + if k.startswith("xmlns:"): + ret[k.replace('xmlns:', '')] = v + + +def parseNamespaces(fro): + sock = open(fro) + for i in sock.readlines(): + line = i.strip() + id, alias, url = line.split(' ') + ooxUrlIds[url] = id + ooxAliasIds[alias] = id + sock.close() + + +def defaultNamespaceAliases(): + return { + 'xml': 'http://www.w3.org/XML/1998/namespace', + } + + +def preprocess(model): + for i in model.getElementsByTagName("namespace-alias"): + name = i.getAttribute("name") + if name in list(ooxUrlIds.keys()): + i.setAttribute("id", ooxUrlIds[name]) + else: + i.setAttribute("id", ooxAliasIds[i.getAttribute("alias")]) + namespaceAliases[name] = i.getAttribute("alias") + + for i in model.getElementsByTagName("namespace"): + # Start elements in this namespace. + startElements = [s.getAttribute("name") for s in i.getElementsByTagName("start")] + + ns = i.getElementsByTagName("grammar")[0].getAttribute("ns") + if ns in list(namespaceAliases.keys()): + i.setAttribute("namespacealias", namespaceAliases[ns]) + else: + i.setAttribute("namespacealias", "") + if ns.startswith("http://schemas.openxmlformats.org/"): + i.setAttribute("prefix", ns.replace('http://schemas.openxmlformats.org/', '').replace('/', '_').replace('-', '_')) + else: + i.setAttribute("prefix", "") + + grammarprefix = prefixForGrammar(i) + grammarns = nsForGrammar(i) + + grammarNamespaceAliases = defaultNamespaceAliases() + grammar = i.getElementsByTagName("grammar")[0] + + parseNamespaceAliases(grammar, grammarNamespaceAliases) + + for j in i.getElementsByTagName("element") + i.getElementsByTagName("attribute"): + if j.localName == "attribute" and not len(j.getAttribute("name")): + continue + + if j.parentNode.localName == "resource": + # Outside grammar element, don't inherit namespaces defined there. + localNamespaceAliases = defaultNamespaceAliases() + else: + # Inside grammar element. + localNamespaceAliases = grammarNamespaceAliases.copy() + + parseNamespaceAliases(j.parentNode, localNamespaceAliases) + parseNamespaceAliases(j, localNamespaceAliases) + + # prefix + prefix = "" + if ":" in j.getAttribute("name"): + nameprefix = j.getAttribute("name").split(':')[0] + if nameprefix in list(localNamespaceAliases.keys()): + prefix = prefixFromUrl(localNamespaceAliases[nameprefix]) + elif j.localName == "attribute": + if grammar.getAttribute("attributeFormDefault") == "qualified": + prefix = grammarprefix + else: + prefix = grammarprefix + + # ns + ns = "" + if ":" in j.getAttribute("name"): + nameprefix = j.getAttribute("name").split(':')[0] + if nameprefix in list(localNamespaceAliases.keys()): + ns = localNamespaceAliases[nameprefix] + elif j.localName == "attribute": + # if parent node is resource, then we're outside the grammar element + if grammar.getAttribute("attributeFormDefault") == "qualified" and not j.parentNode.localName == "resource": + ns = grammarns + else: + ns = grammarns + + # localname + if ":" in j.getAttribute("name"): + localname = j.getAttribute("name").split(':')[1] + elif len(j.getElementsByTagName("anyName")): + localname = "FAST_TOKENS_END" + else: + localname = j.getAttribute("name") + + # set the attributes + enumname = "" + if len(prefix): + enumname += prefix + ":" + j.setAttribute("enumname", enumname + localname) + + qname = "" + if len(ns): + qname += ns + ":" + j.setAttribute("qname", qname + localname) + + j.setAttribute("prefix", prefix) + j.setAttribute("localname", localname) + + for j in i.getElementsByTagName("define") + i.getElementsByTagName("ref"): + name = j.getAttribute("name") + if name.startswith("CT_") or name.startswith("EG_") or name.startswith("AG_"): + j.setAttribute("classfordefine", "1") + elif name in startElements: + j.setAttribute("classfordefine", "1") + else: + j.setAttribute("classfordefine", "0") + + for i in model.getElementsByTagName("grammar"): + if i.getAttribute("ns").startswith("http://schemas.openxmlformats.org/"): + i.setAttribute("application", i.getAttribute("ns").replace('http://schemas.openxmlformats.org/', '').split('/')[0]) + else: + i.setAttribute("application", "") + + +namespacesPath = sys.argv[1] +modelPath = sys.argv[2] + +# URL -> ID, from oox +ooxUrlIds = {} +# Alias -> ID, from oox +ooxAliasIds = {} +parseNamespaces(namespacesPath) + +# URL -> alias +namespaceAliases = {} +model = minidom.parse(modelPath) +preprocess(model) +model.writexml(sys.stdout) + +# vim:set shiftwidth=4 softtabstop=4 expandtab: diff --git a/writerfilter/source/ooxml/modelpreprocess.xsl b/writerfilter/source/ooxml/modelpreprocess.xsl deleted file mode 100644 index b85f5a719820..000000000000 --- a/writerfilter/source/ooxml/modelpreprocess.xsl +++ /dev/null @@ -1,213 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - FAST_TOKENS_END - - - - - - - - - - - - : - - - - - - - : - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 1 - 1 - 0 - - - - - - - - - - - - - - - - - - diff --git a/writerfilter/source/resourcemodel/namespace_preprocess.pl b/writerfilter/source/resourcemodel/namespace_preprocess.pl deleted file mode 100644 index 502fb413c594..000000000000 --- a/writerfilter/source/resourcemodel/namespace_preprocess.pl +++ /dev/null @@ -1,68 +0,0 @@ -# -# This file is part of the LibreOffice project. -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. -# -# This file incorporates work covered by the following license notice: -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed -# with this work for additional information regarding copyright -# ownership. The ASF licenses this file to you under the Apache -# License, Version 2.0 (the "License"); you may not use this file -# except in compliance with the License. You may obtain a copy of -# the License at http://www.apache.org/licenses/LICENSE-2.0 . -# - -$ARGV0 = shift @ARGV; - -print < - - - - - - - - - - - - - - - - - - - - - - -EOF - - -# print the mapping -open ( NAMESPACES, $ARGV0 ) || die "can't open namespace file: $!"; -while ( ) -{ - chomp( $_ ); - # line format is: numeric-id short-name namespace-URL - $_ =~ /^([0-9]+)\s+([a-zA-Z][a-zA-Z0-9]*)\s+([a-zA-Z0-9-.:\/]+)\s*$/ or die "Error: invalid character in input data"; - print < - $1 - -EOF -} - -print < - - - -EOF -- cgit