diff options
Diffstat (limited to 'compilerplugins/clang')
-rw-r--r-- | compilerplugins/clang/stringliteralvar.cxx | 64 | ||||
-rw-r--r-- | compilerplugins/clang/test/stringliteralvar.cxx | 21 |
2 files changed, 85 insertions, 0 deletions
diff --git a/compilerplugins/clang/stringliteralvar.cxx b/compilerplugins/clang/stringliteralvar.cxx index 5ace384f1e16..fcd3690669e7 100644 --- a/compilerplugins/clang/stringliteralvar.cxx +++ b/compilerplugins/clang/stringliteralvar.cxx @@ -28,6 +28,7 @@ #include <cassert> #include "check.hxx" +#include "compat.hxx" #include "plugin.hxx" namespace @@ -137,6 +138,10 @@ public: return true; } auto const d = e1->getDecl(); + if (isPotentiallyInitializedWithMalformedUtf16(d)) + { + return true; + } if (!reportedArray_.insert(d).second) { return true; @@ -188,6 +193,10 @@ public: return true; } auto const d = e->getDecl(); + if (isPotentiallyInitializedWithMalformedUtf16(d)) + { + return true; + } if (!reportedArray_.insert(d).second) { return true; @@ -246,6 +255,61 @@ private: } } + // There is some confusion on the semantics of numeric-escape-sequences in string literals, see + // <https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p2029r4.html> "Proposed resolution + // for core issues 411, 1656, and 2333; numeric and universal character escapes in character and + // string literals", so suppress warnings about arrays that are deliberately not written as + // UTF-16 string literals because they contain lone surrogates: + bool isPotentiallyInitializedWithMalformedUtf16(ValueDecl const* decl) const + { + if (!decl->getType()->getArrayElementTypeNoTypeQual()->isChar16Type()) + { + return false; + } + auto const init = cast<VarDecl>(decl)->getAnyInitializer(); + if (init == nullptr) + { + return true; + } + auto const list = dyn_cast<InitListExpr>(init); + if (list == nullptr) + { + // Assuming that the initializer already is a string literal, assume that that string + // literal has no issues with malformed UTF-16: + if (isDebugMode()) + { + assert(isa<clang::StringLiteral>(init)); + } + return false; + } + auto highSurrogate = false; + for (auto const e : list->inits()) + { + llvm::APSInt v; + if (!compat::EvaluateAsInt(e, v, compiler.getASTContext())) + { + return true; + } + if (highSurrogate) + { + if (v < 0xDC00 || v > 0xDFFF) + { + return true; + } + highSurrogate = false; + } + else if (v >= 0xD800 && v <= 0xDBFF) + { + highSurrogate = true; + } + else if (v >= 0xDC00 && v <= 0xDFFF) + { + return true; + } + } + return highSurrogate; + } + std::set<Decl const*> reportedAutomatic_; std::set<Decl const*> reportedArray_; }; diff --git a/compilerplugins/clang/test/stringliteralvar.cxx b/compilerplugins/clang/test/stringliteralvar.cxx index 3c0eaaccae04..d0fdcedb0668 100644 --- a/compilerplugins/clang/test/stringliteralvar.cxx +++ b/compilerplugins/clang/test/stringliteralvar.cxx @@ -110,4 +110,25 @@ void f11(int nStreamType) (void)sStreamType; } +extern sal_Unicode const extarr[1]; + +sal_Unicode init(); + +void f12() +{ + // Suppress warnings if the array contains a malformed sequence of UTF-16 code units...: + static sal_Unicode const arr1[] = { 0xD800 }; + f(OUString(arr1, 1)); + // ...Or potentially contains a malformed sequence of UTF-16 code units...: + f(OUString(extarr, 1)); + sal_Unicode const arr2[] = { init() }; + f(OUString(arr2, 1)); + // ...But generate a warning if the array contains a well-formed sequence of UTF-16 code units + // containing surrogates: + // expected-error-re@+1 {{change type of variable 'arr3' from constant character array ('const sal_Unicode{{ ?}}[2]'{{( \(aka 'const char16_t\[2\]'\))?}}) to OUStringLiteral [loplugin:stringliteralvar]}} + static sal_Unicode const arr3[] = { 0xD800, 0xDC00 }; + // expected-note-re@+1 {{first passed into a '{{(rtl::)?}}OUString' constructor here [loplugin:stringliteralvar]}} + f(OUString(arr3, 2)); +} + /* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */ |