Verified Commit e4057833 authored by Dave Long's avatar Dave Long
Browse files

fix: #3549107 Escape or strip control characters in JSON:API

By: dries
By: scottatdrake
By: bbrala
(cherry picked from commit 4fa6d7352f6ec954bb331d7fc5fcd5299fb57c7b)
parent a01dfd32
Loading
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -527,6 +527,7 @@ trgm
truecolor
twistie
twocol
ufffd
uids
unban
uncacheable
+70 −0
Original line number Diff line number Diff line
<?php

declare(strict_types=1);

namespace Drupal\Tests\jsonapi\Unit\Encoder;

use Drupal\jsonapi\Encoder\JsonEncoder;
use Drupal\Tests\UnitTestCase;
use PHPUnit\Framework\Attributes\CoversClass;
use PHPUnit\Framework\Attributes\Group;

/**
 * Tests the JSON:API encoder.
 *
 * @internal
 */
#[CoversClass(JsonEncoder::class)]
#[Group('jsonapi')]
class JsonEncoderTest extends UnitTestCase {

  /**
   * The encoder under test.
   */
  protected JsonEncoder $encoder;

  /**
   * {@inheritdoc}
   */
  protected function setUp(): void {
    parent::setUp();
    $this->encoder = new JsonEncoder();
  }

  /**
   * Tests the supportsEncoding() method.
   */
  public function testSupportsEncoding(): void {
    $this->assertTrue($this->encoder->supportsEncoding('api_json'));
    $this->assertFalse($this->encoder->supportsEncoding('json'));
    $this->assertFalse($this->encoder->supportsEncoding('xml'));
  }

  /**
   * Tests JSON:API encoder inherits control character handling.
   *
   * This verifies that the JSON:API encoder correctly inherits the
   * JSON_INVALID_UTF8_SUBSTITUTE flag from the parent serialization encoder.
   * The comprehensive tests for this functionality are in the serialization
   * module's JsonEncoderTest.
   *
   * @see \Drupal\Tests\serialization\Unit\Encoder\JsonEncoderTest
   * @see https://www.drupal.org/project/drupal/issues/3549107
   */
  public function testInheritsControlCharacterHandling(): void {
    // Test that invalid UTF-8 is handled (would fail without the flag).
    $input = "Test\x80Data";
    $encoded = $this->encoder->encode($input, 'api_json');

    // Verify it's valid JSON.
    $this->assertJson($encoded, 'Encoded output should be valid JSON even with invalid UTF-8.');

    // Verify the replacement character is present.
    $this->assertStringContainsString('\ufffd', $encoded, 'Invalid UTF-8 should be replaced with U+FFFD.');

    // Verify it can be decoded.
    json_decode($encoded);
    $this->assertSame(JSON_ERROR_NONE, json_last_error(), 'Encoded JSON should be decodable without errors.');
  }

}
+4 −1
Original line number Diff line number Diff line
@@ -29,7 +29,10 @@ public function __construct(?JsonEncode $encodingImpl = NULL, ?JsonDecode $decod
    // Encode <, >, ', &, and " for RFC4627-compliant JSON, which may also be
    // embedded into HTML.
    // @see \Symfony\Component\HttpFoundation\JsonResponse
    $json_encoding_options = JSON_HEX_TAG | JSON_HEX_APOS | JSON_HEX_AMP | JSON_HEX_QUOT;
    // Additionally, substitute invalid UTF-8 sequences to ensure control
    // characters and malformed data are properly handled.
    // @see https://www.drupal.org/project/drupal/issues/3549107
    $json_encoding_options = JSON_HEX_TAG | JSON_HEX_APOS | JSON_HEX_AMP | JSON_HEX_QUOT | JSON_INVALID_UTF8_SUBSTITUTE;
    $this->encodingImpl = $encodingImpl ?: new JsonEncode([JsonEncode::OPTIONS => $json_encoding_options]);
    $this->decodingImpl = $decodingImpl ?: new JsonDecode([JsonDecode::ASSOCIATIVE => TRUE]);
  }
+85 −4
Original line number Diff line number Diff line
@@ -16,15 +16,96 @@
#[Group('serialization')]
class JsonEncoderTest extends UnitTestCase {

  /**
   * The encoder under test.
   */
  protected JsonEncoder $encoder;

  /**
   * {@inheritdoc}
   */
  protected function setUp(): void {
    parent::setUp();
    $this->encoder = new JsonEncoder();
  }

  /**
   * Tests the supportsEncoding() method.
   */
  public function testSupportsEncoding(): void {
    $encoder = new JsonEncoder();
    $this->assertTrue($this->encoder->supportsEncoding('json'));
    $this->assertTrue($this->encoder->supportsEncoding('ajax'));
    $this->assertFalse($this->encoder->supportsEncoding('xml'));
  }

  /**
   * Tests that invalid UTF-8 is handled via JSON_INVALID_UTF8_SUBSTITUTE.
   *
   * @see https://www.drupal.org/project/drupal/issues/3549107
   */
  public function testEncodeInvalidUtf8IsSubstituted(): void {
    // A representative invalid UTF-8 sequence that would previously cause
    // json_encode() to fail.
    $input = "Test\x80Data";
    $encoded = $this->encoder->encode($input, 'json');

    // Verify it's valid JSON (would fail without the flag).
    $this->assertJson($encoded, 'Encoded output should be valid JSON even with invalid UTF-8.');

    // Verify the replacement character is present.
    $this->assertStringContainsString('\\ufffd', $encoded, 'Invalid UTF-8 should be replaced with U+FFFD.');

    // Verify it can be decoded.
    json_decode($encoded);
    $this->assertSame(JSON_ERROR_NONE, json_last_error(), 'Encoded JSON should be decodable without errors.');
  }

  /**
   * Tests that HTML-unsafe characters are still escaped.
   *
   * This ensures that existing JSON_HEX_* behavior is preserved after adding
   * JSON_INVALID_UTF8_SUBSTITUTE.
   */
  public function testHtmlUnsafeCharactersAreEscaped(): void {
    $input = "<script>alert('test & \"hack\"');</script>";
    $encoded = $this->encoder->encode($input, 'json');

    // Verify it's valid JSON.
    $this->assertJson($encoded, 'HTML-unsafe characters should produce valid JSON.');

    // Verify HTML-unsafe characters are escaped as hex codes.
    $this->assertStringContainsString('\\u003C', $encoded, '< should be escaped to \\u003C.');
    $this->assertStringContainsString('\\u003E', $encoded, '> should be escaped to \\u003E.');
    $this->assertStringContainsString('\\u0027', $encoded, "' should be escaped to \\u0027.");
    $this->assertStringContainsString('\\u0026', $encoded, '& should be escaped to \\u0026.');
    $this->assertStringContainsString('\\u0022', $encoded, '" should be escaped to \\u0022.');
  }

  /**
   * Simple structured data smoke test.
   *
   * This verifies that the encoder works for nested arrays and that invalid
   * UTF-8 inside the structure is still handled correctly.
   */
  public function testStructuredDataSmokeTest(): void {
    $data = [
      'title' => 'Example',
      'body' => "Content with invalid UTF-8: \x80",
      'metadata' => [
        'tags' => ['one', 'two'],
      ],
    ];

    $encoded = $this->encoder->encode($data, 'json');

    $this->assertJson($encoded, 'Structured data should produce valid JSON.');
    $this->assertStringContainsString('\\ufffd', $encoded, 'Invalid UTF-8 in nested data should be replaced.');

    $this->assertTrue($encoder->supportsEncoding('json'));
    $this->assertTrue($encoder->supportsEncoding('ajax'));
    $this->assertFalse($encoder->supportsEncoding('xml'));
    $decoded = json_decode($encoded, TRUE);
    $this->assertSame(JSON_ERROR_NONE, json_last_error(), 'Structured data should be decodable.');
    $this->assertIsArray($decoded, 'Decoded data should be an array.');
    $this->assertArrayHasKey('title', $decoded, 'Decoded data should have title key.');
    $this->assertArrayHasKey('metadata', $decoded, 'Decoded data should have metadata key.');
  }

}