Utf8.php 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. <?php
  2. /**
  3. * CodeIgniter
  4. *
  5. * An open source application development framework for PHP
  6. *
  7. * This content is released under the MIT License (MIT)
  8. *
  9. * Copyright (c) 2014 - 2017, British Columbia Institute of Technology
  10. *
  11. * Permission is hereby granted, free of charge, to any person obtaining a copy
  12. * of this software and associated documentation files (the "Software"), to deal
  13. * in the Software without restriction, including without limitation the rights
  14. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  15. * copies of the Software, and to permit persons to whom the Software is
  16. * furnished to do so, subject to the following conditions:
  17. *
  18. * The above copyright notice and this permission notice shall be included in
  19. * all copies or substantial portions of the Software.
  20. *
  21. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  22. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  23. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  24. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  25. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  26. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  27. * THE SOFTWARE.
  28. *
  29. * @package CodeIgniter
  30. * @author EllisLab Dev Team
  31. * @copyright Copyright (c) 2008 - 2014, EllisLab, Inc. (https://ellislab.com/)
  32. * @copyright Copyright (c) 2014 - 2017, British Columbia Institute of Technology (http://bcit.ca/)
  33. * @license http://opensource.org/licenses/MIT MIT License
  34. * @link https://codeigniter.com
  35. * @since Version 2.0.0
  36. * @filesource
  37. */
  38. defined('BASEPATH') OR exit('No direct script access allowed');
  39. /**
  40. * Utf8 Class
  41. *
  42. * Provides support for UTF-8 environments
  43. *
  44. * @package CodeIgniter
  45. * @subpackage Libraries
  46. * @category UTF-8
  47. * @author EllisLab Dev Team
  48. * @link https://codeigniter.com/user_guide/libraries/utf8.html
  49. */
  50. class CI_Utf8 {
  51. /**
  52. * Class constructor
  53. *
  54. * Determines if UTF-8 support is to be enabled.
  55. *
  56. * @return void
  57. */
  58. public function __construct()
  59. {
  60. if (
  61. defined('PREG_BAD_UTF8_ERROR') // PCRE must support UTF-8
  62. && (ICONV_ENABLED === TRUE OR MB_ENABLED === TRUE) // iconv or mbstring must be installed
  63. && strtoupper(config_item('charset')) === 'UTF-8' // Application charset must be UTF-8
  64. )
  65. {
  66. define('UTF8_ENABLED', TRUE);
  67. log_message('debug', 'UTF-8 Support Enabled');
  68. }
  69. else
  70. {
  71. define('UTF8_ENABLED', FALSE);
  72. log_message('debug', 'UTF-8 Support Disabled');
  73. }
  74. log_message('info', 'Utf8 Class Initialized');
  75. }
  76. // --------------------------------------------------------------------
  77. /**
  78. * Clean UTF-8 strings
  79. *
  80. * Ensures strings contain only valid UTF-8 characters.
  81. *
  82. * @param string $str String to clean
  83. * @return string
  84. */
  85. public function clean_string($str)
  86. {
  87. if ($this->is_ascii($str) === FALSE)
  88. {
  89. if (MB_ENABLED)
  90. {
  91. $str = mb_convert_encoding($str, 'UTF-8', 'UTF-8');
  92. }
  93. elseif (ICONV_ENABLED)
  94. {
  95. $str = @iconv('UTF-8', 'UTF-8//IGNORE', $str);
  96. }
  97. }
  98. return $str;
  99. }
  100. // --------------------------------------------------------------------
  101. /**
  102. * Remove ASCII control characters
  103. *
  104. * Removes all ASCII control characters except horizontal tabs,
  105. * line feeds, and carriage returns, as all others can cause
  106. * problems in XML.
  107. *
  108. * @param string $str String to clean
  109. * @return string
  110. */
  111. public function safe_ascii_for_xml($str)
  112. {
  113. return remove_invisible_characters($str, FALSE);
  114. }
  115. // --------------------------------------------------------------------
  116. /**
  117. * Convert to UTF-8
  118. *
  119. * Attempts to convert a string to UTF-8.
  120. *
  121. * @param string $str Input string
  122. * @param string $encoding Input encoding
  123. * @return string $str encoded in UTF-8 or FALSE on failure
  124. */
  125. public function convert_to_utf8($str, $encoding)
  126. {
  127. if (MB_ENABLED)
  128. {
  129. return mb_convert_encoding($str, 'UTF-8', $encoding);
  130. }
  131. elseif (ICONV_ENABLED)
  132. {
  133. return @iconv($encoding, 'UTF-8', $str);
  134. }
  135. return FALSE;
  136. }
  137. // --------------------------------------------------------------------
  138. /**
  139. * Is ASCII?
  140. *
  141. * Tests if a string is standard 7-bit ASCII or not.
  142. *
  143. * @param string $str String to check
  144. * @return bool
  145. */
  146. public function is_ascii($str)
  147. {
  148. return (preg_match('/[^\x00-\x7F]/S', $str) === 0);
  149. }
  150. }