diff options
author | Baole Fang <baole.fang@gmail.com> | 2023-04-27 14:16:18 -0400 |
---|---|---|
committer | Stephan Bergmann <sbergman@redhat.com> | 2023-05-03 07:55:02 +0200 |
commit | e8d7bf954fe74ce85bdd084d6e12d27027a4c379 (patch) | |
tree | eab59d3e6aeb6fa4aa5650b91abe731077f00142 /svl | |
parent | a21e151508918c39189a18d46fcba490100e5890 (diff) |
tdf#145925: Support AutoCapitalize in DOI recognition
Sometimes, the first character of the doi string is auto capitalized, which isn't recognized as DOI.
Now, the doi detection is able to recognize doi string with the first character capitalized,
like what is done in url recognition.
Change-Id: I95334941dc4cda3095f1750fea927640dea55e23
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/151142
Tested-by: Jenkins
Reviewed-by: Stephan Bergmann <sbergman@redhat.com>
Diffstat (limited to 'svl')
-rw-r--r-- | svl/qa/unit/test_URIHelper.cxx | 5 | ||||
-rw-r--r-- | svl/source/misc/urihelper.cxx | 4 |
2 files changed, 7 insertions, 2 deletions
diff --git a/svl/qa/unit/test_URIHelper.cxx b/svl/qa/unit/test_URIHelper.cxx index df9e5d5114b8..33a08996cb58 100644 --- a/svl/qa/unit/test_URIHelper.cxx +++ b/svl/qa/unit/test_URIHelper.cxx @@ -410,6 +410,11 @@ void Test::testFindFirstDOIInText() { }; static Data const tests[] = { { "doi:10.1000/182", "https://doi.org/10.1000/182", 0, 15 }, // valid doi suffix with only digits + { "Doi:10.1000/182", "https://doi.org/10.1000/182", 0, 15 }, // valid doi suffix with some of the the first three character being capitalized + { "DoI:10.1000/182", "https://doi.org/10.1000/182", 0, 15 }, // valid doi suffix with some of the the first three character being capitalized + { "DOI:10.1000/182", "https://doi.org/10.1000/182", 0, 15 }, // valid doi suffix with some of the the first three character being capitalized + { "dOI:10.1000/182", "https://doi.org/10.1000/182", 0, 15 }, // valid doi suffix with some of the the first three character being capitalized + { "dOi:10.1000/182", "https://doi.org/10.1000/182", 0, 15 }, // valid doi suffix with some of the the first three character being capitalized { "doi:10.1038/nature03001", "https://doi.org/10.1038/nature03001", 0, 23 }, // valid doi suffix with alphanumeric characters { "doi:10.1093/ajae/aaq063", "https://doi.org/10.1093/ajae/aaq063", 0, 23 }, // valid doi suffix with multiple slash { "doi:10.1016/S0735-1097(98)00347-7", "https://doi.org/10.1016/S0735-1097(98)00347-7", 0, 33 }, // valid doi suffix with characters apart from alphanumeric diff --git a/svl/source/misc/urihelper.cxx b/svl/source/misc/urihelper.cxx index 0043b7883a87..9aa78a584614 100644 --- a/svl/source/misc/urihelper.cxx +++ b/svl/source/misc/urihelper.cxx @@ -757,7 +757,7 @@ OUString URIHelper::FindFirstDOIInText(OUString const & rText, sal_Int32 count = rEnd-rBegin; OUString candidate(rText.subView(rBegin, count)); // Match with regex "doi:10\.\d{4,9}\/[-._;()\/:a-zA-Z0-9]+" - if (candidate.startsWith("doi:10.")) + if (candidate.startsWithIgnoreAsciiCase("doi:10.")) { bool flag = true; sal_Int32 digit = 0; @@ -797,7 +797,7 @@ OUString URIHelper::FindFirstDOIInText(OUString const & rText, } if (flag && digit==-1) { - return candidate.replaceFirst("doi:","https://doi.org/"); + return OUString::Concat("https://doi.org/")+candidate.subView(4); } } rBegin = rEnd; |