summaryrefslogtreecommitdiff
path: root/compilerplugins/clang
diff options
context:
space:
mode:
Diffstat (limited to 'compilerplugins/clang')
-rw-r--r--compilerplugins/clang/stringliteralvar.cxx64
-rw-r--r--compilerplugins/clang/test/stringliteralvar.cxx21
2 files changed, 85 insertions, 0 deletions
diff --git a/compilerplugins/clang/stringliteralvar.cxx b/compilerplugins/clang/stringliteralvar.cxx
index 5ace384f1e16..fcd3690669e7 100644
--- a/compilerplugins/clang/stringliteralvar.cxx
+++ b/compilerplugins/clang/stringliteralvar.cxx
@@ -28,6 +28,7 @@
#include <cassert>
#include "check.hxx"
+#include "compat.hxx"
#include "plugin.hxx"
namespace
@@ -137,6 +138,10 @@ public:
return true;
}
auto const d = e1->getDecl();
+ if (isPotentiallyInitializedWithMalformedUtf16(d))
+ {
+ return true;
+ }
if (!reportedArray_.insert(d).second)
{
return true;
@@ -188,6 +193,10 @@ public:
return true;
}
auto const d = e->getDecl();
+ if (isPotentiallyInitializedWithMalformedUtf16(d))
+ {
+ return true;
+ }
if (!reportedArray_.insert(d).second)
{
return true;
@@ -246,6 +255,61 @@ private:
}
}
+ // There is some confusion on the semantics of numeric-escape-sequences in string literals, see
+ // <https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p2029r4.html> "Proposed resolution
+ // for core issues 411, 1656, and 2333; numeric and universal character escapes in character and
+ // string literals", so suppress warnings about arrays that are deliberately not written as
+ // UTF-16 string literals because they contain lone surrogates:
+ bool isPotentiallyInitializedWithMalformedUtf16(ValueDecl const* decl) const
+ {
+ if (!decl->getType()->getArrayElementTypeNoTypeQual()->isChar16Type())
+ {
+ return false;
+ }
+ auto const init = cast<VarDecl>(decl)->getAnyInitializer();
+ if (init == nullptr)
+ {
+ return true;
+ }
+ auto const list = dyn_cast<InitListExpr>(init);
+ if (list == nullptr)
+ {
+ // Assuming that the initializer already is a string literal, assume that that string
+ // literal has no issues with malformed UTF-16:
+ if (isDebugMode())
+ {
+ assert(isa<clang::StringLiteral>(init));
+ }
+ return false;
+ }
+ auto highSurrogate = false;
+ for (auto const e : list->inits())
+ {
+ llvm::APSInt v;
+ if (!compat::EvaluateAsInt(e, v, compiler.getASTContext()))
+ {
+ return true;
+ }
+ if (highSurrogate)
+ {
+ if (v < 0xDC00 || v > 0xDFFF)
+ {
+ return true;
+ }
+ highSurrogate = false;
+ }
+ else if (v >= 0xD800 && v <= 0xDBFF)
+ {
+ highSurrogate = true;
+ }
+ else if (v >= 0xDC00 && v <= 0xDFFF)
+ {
+ return true;
+ }
+ }
+ return highSurrogate;
+ }
+
std::set<Decl const*> reportedAutomatic_;
std::set<Decl const*> reportedArray_;
};
diff --git a/compilerplugins/clang/test/stringliteralvar.cxx b/compilerplugins/clang/test/stringliteralvar.cxx
index 3c0eaaccae04..d0fdcedb0668 100644
--- a/compilerplugins/clang/test/stringliteralvar.cxx
+++ b/compilerplugins/clang/test/stringliteralvar.cxx
@@ -110,4 +110,25 @@ void f11(int nStreamType)
(void)sStreamType;
}
+extern sal_Unicode const extarr[1];
+
+sal_Unicode init();
+
+void f12()
+{
+ // Suppress warnings if the array contains a malformed sequence of UTF-16 code units...:
+ static sal_Unicode const arr1[] = { 0xD800 };
+ f(OUString(arr1, 1));
+ // ...Or potentially contains a malformed sequence of UTF-16 code units...:
+ f(OUString(extarr, 1));
+ sal_Unicode const arr2[] = { init() };
+ f(OUString(arr2, 1));
+ // ...But generate a warning if the array contains a well-formed sequence of UTF-16 code units
+ // containing surrogates:
+ // expected-error-re@+1 {{change type of variable 'arr3' from constant character array ('const sal_Unicode{{ ?}}[2]'{{( \(aka 'const char16_t\[2\]'\))?}}) to OUStringLiteral [loplugin:stringliteralvar]}}
+ static sal_Unicode const arr3[] = { 0xD800, 0xDC00 };
+ // expected-note-re@+1 {{first passed into a '{{(rtl::)?}}OUString' constructor here [loplugin:stringliteralvar]}}
+ f(OUString(arr3, 2));
+}
+
/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */