7 #include "fwGdcmIO/helper/Encoding.hpp" 11 #include <fwLog/Logger.hpp> 13 #include <boost/algorithm/string/classification.hpp> 14 #include <boost/algorithm/string/split.hpp> 15 #include <boost/assign/list_of.hpp> 16 #include <boost/foreach.hpp> 17 #include <boost/locale/encoding.hpp> 24 const Encoding::DefinedTermToCharsetMapType Encoding::s_DEFINED_TERM_TO_CHARSET = ::boost::assign::map_list_of
30 (
"ISO_IR 100",
"ISO-8859-1")
31 (
"ISO 2022 IR 100",
"ISO-8859-1")
34 (
"ISO_IR 101",
"ISO-8859-2")
35 (
"ISO 2022 IR 101",
"ISO-8859-2")
38 (
"ISO_IR 109",
"ISO-8859-3")
39 (
"ISO 2022 IR 109",
"ISO-8859-3")
42 (
"ISO_IR 110",
"ISO-8859-4")
43 (
"ISO 2022 IR 110",
"ISO-8859-4")
46 (
"ISO_IR 144",
"ISO-8859-5")
47 (
"ISO 2022 IR 144",
"ISO-8859-5")
50 (
"ISO_IR 127",
"ISO-8859-6")
51 (
"ISO 2022 IR 127",
"ISO-8859-6")
54 (
"ISO_IR 126",
"ISO-8859-7")
55 (
"ISO 2022 IR 126",
"ISO-8859-7")
58 (
"ISO_IR 138",
"ISO-8859-8")
59 (
"ISO 2022 IR 138",
"ISO-8859-8")
62 (
"ISO_IR 148",
"ISO-8859-9")
63 (
"ISO 2022 IR 148",
"ISO-8859-9")
66 (
"ISO_IR 13",
"JIS_X0201")
67 (
"ISO 2022 IR 13",
"JIS_X0201")
70 (
"ISO_IR 166",
"ISO-IR-166")
71 (
"ISO 2022 IR 166",
"ISO-IR-166")
74 (
"ISO 2022 IR 87",
"ISO-IR-87")
75 (
"ISO 2022 IR 159",
"ISO-IR-159")
79 (
"ISO 2022 IR 149",
"EUC-KR")
83 (
"ISO 2022 IR 58",
"GB2312")
86 (
"ISO_IR 192",
"UTF-8")
89 (
"GB18030",
"GB18030")
95 const Encoding::EscapeSequenceToCharsetMapType Encoding::s_ESCAPE_SEQUENCE_TO_CHARSET = ::boost::assign::map_list_of
100 (std::make_pair(0x2d, 0x41),
106 (std::make_pair(0x2d, 0x42),
112 (std::make_pair(0x2d, 0x43),
118 (std::make_pair(0x2d, 0x44),
124 (std::make_pair(0x2d, 0x4c),
128 (std::make_pair(0x2d, 0x47),
132 (std::make_pair(0x2d, 0x46),
136 (std::make_pair(0x2d, 0x48),
140 (std::make_pair(0x2d, 0x4d),
146 (std::make_pair(0x29, 0x49),
153 (std::make_pair(0x28, 0x4a),
157 (std::make_pair(0x2d, 0x54),
161 (std::make_pair(0x24, 0x42),
171 const std::string& definedCharsetTerm,
172 const ::fwLog::Logger::sptr& logger)
181 std::vector<std::string> definedTermList;
182 ::boost::split(definedTermList, definedCharsetTerm, ::boost::is_any_of(
"\\"));
185 if(definedCharsetTerm.empty() || definedTermList.size() == 1)
187 return convertStringWithoutCodeExtensions(source, definedCharsetTerm, logger);
194 if(definedTermList[0].empty())
196 definedTermList[0] =
"ISO 2022 IR 6";
200 std::vector<std::string> sequenceList;
201 ::boost::split(sequenceList, source, ::boost::is_any_of(
"\033"));
206 if(source[0] !=
'\033')
208 result += convertStringWithoutCodeExtensions(sequenceList[0], definedTermList[0], logger);
212 result += Encoding::convertSequenceWithCodeExtensions(sequenceList[0], definedTermList, logger);
216 std::vector<std::string>::iterator it = ++sequenceList.begin();
217 for(; it != sequenceList.end(); ++it)
219 result += convertSequenceWithCodeExtensions(*it, definedTermList, logger);
228 std::string Encoding::convertStringWithoutCodeExtensions(
const std::string& source,
229 const std::string& definedTerm,
230 const ::fwLog::Logger::sptr& logger)
234 if (definedTerm.empty())
240 SLM_WARN_IF(
"'ISO_IR 6' is not a defined term in DICOM, will be treated as an empty value (ASCII)",
241 definedTerm ==
"ISO_IR 6");
244 if(s_DEFINED_TERM_TO_CHARSET.find(definedTerm) != s_DEFINED_TERM_TO_CHARSET.end())
246 charset = s_DEFINED_TERM_TO_CHARSET.at(definedTerm);
250 const std::string msg =
"'"+definedTerm+
"' is not a defined term in DICOM, " 251 "will be treated as an empty value (ASCII)";
256 logger->warning(msg);
270 return ::boost::locale::conv::to_utf<char>(source, charset);
277 void checkDefinedTermDeclaration(
const std::string& definedTerm,
278 const std::vector<std::string>& definedTermList,
279 const ::fwLog::Logger::sptr& logger)
281 if(std::find(definedTermList.begin(), definedTermList.end(), definedTerm) == definedTermList.end())
283 const std::string msg =
"Escape sequence refers to character set '" + definedTerm
284 +
"' that was not declared in SpecificCharacterSet (0008,0005).";
289 logger->warning(msg);
296 std::string Encoding::convertSequenceWithCodeExtensions(
const std::string& sequence,
297 const std::vector<std::string>& definedTermList,
298 const ::fwLog::Logger::sptr& logger)
302 FW_RAISE_IF(
"Cannot convert character set: Incomplete escape sequence.", sequence.size() < 2);
304 const char c1 = sequence[0];
305 const char c2 = sequence[1];
307 unsigned short escapeSize = 2;
309 EscapeSequenceType escapeSequence = std::make_pair(c1, c2);
310 DefinedTermAndCharsetPairType definedTermAndCharset = std::make_pair(
"",
"");
312 if(s_ESCAPE_SEQUENCE_TO_CHARSET.find(escapeSequence) != s_ESCAPE_SEQUENCE_TO_CHARSET.end())
314 definedTermAndCharset = s_ESCAPE_SEQUENCE_TO_CHARSET.at(escapeSequence);
316 else if ((c1 == 0x24) && (c2 == 0x28))
319 if(sequence.size() >= 3)
322 if (sequence[2] == 0x44)
324 definedTermAndCharset = std::make_pair(
"ISO 2022 IR 159",
"ISO-IR-159");
328 else if ((c1 == 0x24) && (c2 == 0x29))
331 if(sequence.size() >= 3)
334 if (sequence[2] == 0x43)
337 definedTermAndCharset = std::make_pair(
"ISO 2022 IR 149",
"EUC-KR");
339 else if (sequence[2] == 0x41)
342 definedTermAndCharset = std::make_pair(
"ISO 2022 IR 58",
"GB2312");
348 FW_RAISE_IF(
"Unable to retrieve character set from escape sequence.", definedTermAndCharset.first.empty());
351 checkDefinedTermDeclaration(definedTermAndCharset.first, definedTermList, logger);
354 if(definedTermAndCharset.second.empty())
356 return sequence.substr(escapeSize);
360 return ::boost::locale::conv::to_utf<char>(sequence.substr(escapeSize), definedTermAndCharset.second);
The namespace fwGdcmIO contains reader, writer and helper for dicom data.
This file defines SpyLog macros. These macros are used to log messages to a file or to the console du...
static FWGDCMIO_API std::string convertString(const std::string &source, const std::string &definedCharsetTerm, const std::shared_ptr< ::fwLog::Logger > &logger=nullptr)
Convert a DICOM string from the specified charset to utf-8.
#define SLM_WARN_IF(message, cond)