summaryrefslogtreecommitdiff
path: root/svl
diff options
context:
space:
mode:
authorBaole Fang <baole.fang@gmail.com>2023-04-27 14:16:18 -0400
committerStephan Bergmann <sbergman@redhat.com>2023-05-03 07:55:02 +0200
commite8d7bf954fe74ce85bdd084d6e12d27027a4c379 (patch)
treeeab59d3e6aeb6fa4aa5650b91abe731077f00142 /svl
parenta21e151508918c39189a18d46fcba490100e5890 (diff)
tdf#145925: Support AutoCapitalize in DOI recognition
Sometimes, the first character of the doi string is auto capitalized, which isn't recognized as DOI. Now, the doi detection is able to recognize doi string with the first character capitalized, like what is done in url recognition. Change-Id: I95334941dc4cda3095f1750fea927640dea55e23 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/151142 Tested-by: Jenkins Reviewed-by: Stephan Bergmann <sbergman@redhat.com>
Diffstat (limited to 'svl')
-rw-r--r--svl/qa/unit/test_URIHelper.cxx5
-rw-r--r--svl/source/misc/urihelper.cxx4
2 files changed, 7 insertions, 2 deletions
diff --git a/svl/qa/unit/test_URIHelper.cxx b/svl/qa/unit/test_URIHelper.cxx
index df9e5d5114b8..33a08996cb58 100644
--- a/svl/qa/unit/test_URIHelper.cxx
+++ b/svl/qa/unit/test_URIHelper.cxx
@@ -410,6 +410,11 @@ void Test::testFindFirstDOIInText() {
};
static Data const tests[] = {
{ "doi:10.1000/182", "https://doi.org/10.1000/182", 0, 15 }, // valid doi suffix with only digits
+ { "Doi:10.1000/182", "https://doi.org/10.1000/182", 0, 15 }, // valid doi suffix with some of the the first three character being capitalized
+ { "DoI:10.1000/182", "https://doi.org/10.1000/182", 0, 15 }, // valid doi suffix with some of the the first three character being capitalized
+ { "DOI:10.1000/182", "https://doi.org/10.1000/182", 0, 15 }, // valid doi suffix with some of the the first three character being capitalized
+ { "dOI:10.1000/182", "https://doi.org/10.1000/182", 0, 15 }, // valid doi suffix with some of the the first three character being capitalized
+ { "dOi:10.1000/182", "https://doi.org/10.1000/182", 0, 15 }, // valid doi suffix with some of the the first three character being capitalized
{ "doi:10.1038/nature03001", "https://doi.org/10.1038/nature03001", 0, 23 }, // valid doi suffix with alphanumeric characters
{ "doi:10.1093/ajae/aaq063", "https://doi.org/10.1093/ajae/aaq063", 0, 23 }, // valid doi suffix with multiple slash
{ "doi:10.1016/S0735-1097(98)00347-7", "https://doi.org/10.1016/S0735-1097(98)00347-7", 0, 33 }, // valid doi suffix with characters apart from alphanumeric
diff --git a/svl/source/misc/urihelper.cxx b/svl/source/misc/urihelper.cxx
index 0043b7883a87..9aa78a584614 100644
--- a/svl/source/misc/urihelper.cxx
+++ b/svl/source/misc/urihelper.cxx
@@ -757,7 +757,7 @@ OUString URIHelper::FindFirstDOIInText(OUString const & rText,
sal_Int32 count = rEnd-rBegin;
OUString candidate(rText.subView(rBegin, count));
// Match with regex "doi:10\.\d{4,9}\/[-._;()\/:a-zA-Z0-9]+"
- if (candidate.startsWith("doi:10."))
+ if (candidate.startsWithIgnoreAsciiCase("doi:10."))
{
bool flag = true;
sal_Int32 digit = 0;
@@ -797,7 +797,7 @@ OUString URIHelper::FindFirstDOIInText(OUString const & rText,
}
if (flag && digit==-1)
{
- return candidate.replaceFirst("doi:","https://doi.org/");
+ return OUString::Concat("https://doi.org/")+candidate.subView(4);
}
}
rBegin = rEnd;