From e8d7bf954fe74ce85bdd084d6e12d27027a4c379 Mon Sep 17 00:00:00 2001 From: Baole Fang Date: Thu, 27 Apr 2023 14:16:18 -0400 Subject: tdf#145925: Support AutoCapitalize in DOI recognition Sometimes, the first character of the doi string is auto capitalized, which isn't recognized as DOI. Now, the doi detection is able to recognize doi string with the first character capitalized, like what is done in url recognition. Change-Id: I95334941dc4cda3095f1750fea927640dea55e23 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/151142 Tested-by: Jenkins Reviewed-by: Stephan Bergmann --- svl/qa/unit/test_URIHelper.cxx | 5 +++++ svl/source/misc/urihelper.cxx | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'svl') diff --git a/svl/qa/unit/test_URIHelper.cxx b/svl/qa/unit/test_URIHelper.cxx index df9e5d5114b8..33a08996cb58 100644 --- a/svl/qa/unit/test_URIHelper.cxx +++ b/svl/qa/unit/test_URIHelper.cxx @@ -410,6 +410,11 @@ void Test::testFindFirstDOIInText() { }; static Data const tests[] = { { "doi:10.1000/182", "https://doi.org/10.1000/182", 0, 15 }, // valid doi suffix with only digits + { "Doi:10.1000/182", "https://doi.org/10.1000/182", 0, 15 }, // valid doi suffix with some of the the first three character being capitalized + { "DoI:10.1000/182", "https://doi.org/10.1000/182", 0, 15 }, // valid doi suffix with some of the the first three character being capitalized + { "DOI:10.1000/182", "https://doi.org/10.1000/182", 0, 15 }, // valid doi suffix with some of the the first three character being capitalized + { "dOI:10.1000/182", "https://doi.org/10.1000/182", 0, 15 }, // valid doi suffix with some of the the first three character being capitalized + { "dOi:10.1000/182", "https://doi.org/10.1000/182", 0, 15 }, // valid doi suffix with some of the the first three character being capitalized { "doi:10.1038/nature03001", "https://doi.org/10.1038/nature03001", 0, 23 }, // valid doi suffix with alphanumeric characters { "doi:10.1093/ajae/aaq063", "https://doi.org/10.1093/ajae/aaq063", 0, 23 }, // valid doi suffix with multiple slash { "doi:10.1016/S0735-1097(98)00347-7", "https://doi.org/10.1016/S0735-1097(98)00347-7", 0, 33 }, // valid doi suffix with characters apart from alphanumeric diff --git a/svl/source/misc/urihelper.cxx b/svl/source/misc/urihelper.cxx index 0043b7883a87..9aa78a584614 100644 --- a/svl/source/misc/urihelper.cxx +++ b/svl/source/misc/urihelper.cxx @@ -757,7 +757,7 @@ OUString URIHelper::FindFirstDOIInText(OUString const & rText, sal_Int32 count = rEnd-rBegin; OUString candidate(rText.subView(rBegin, count)); // Match with regex "doi:10\.\d{4,9}\/[-._;()\/:a-zA-Z0-9]+" - if (candidate.startsWith("doi:10.")) + if (candidate.startsWithIgnoreAsciiCase("doi:10.")) { bool flag = true; sal_Int32 digit = 0; @@ -797,7 +797,7 @@ OUString URIHelper::FindFirstDOIInText(OUString const & rText, } if (flag && digit==-1) { - return candidate.replaceFirst("doi:","https://doi.org/"); + return OUString::Concat("https://doi.org/")+candidate.subView(4); } } rBegin = rEnd; -- cgit