Skip to content

Commit

Permalink
Writer/Xls/Parser::advance Should Parse by Character
Browse files Browse the repository at this point in the history
It currently parsed by byte, which is not a good thing in a UTF-8 system. See the discussion at the bottom of PR #4203. That turns out to not be the change which led to this problem; that would have been PR #4323. That change came about because using Composer/Pcre revealed bugs in several regular expressions used in Writer/Xls/Parser. This ticket comes about because more bugs were revealed in the same module. The problem is that the `advance` method needs to process formulas character by character, but is instead doing it byte by byte. It is changed to advance by characters, and tests for non-ASCII characters are added.
  • Loading branch information
oleibman committed Feb 7, 2025
1 parent eafbed6 commit ac34f58
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 9 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ and this project adheres to [Semantic Versioning](https://semver.org).
### Fixed

- Xls writer Parser Mishandling True/False Argument. [Issue #4331](https://github.com/PHPOffice/PhpSpreadsheet/issues/4331) [PR #4333](https://github.com/PHPOffice/PhpSpreadsheet/pull/4333)
- Xls writer Parser Parse By Character Not Byte. [PR #4344](https://github.com/PHPOffice/PhpSpreadsheet/pull/4344)

## 2025-01-26 - 3.9.0

Expand Down
21 changes: 12 additions & 9 deletions src/PhpSpreadsheet/Writer/Xls/Parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ class Parser
. '[$]?[A-Ia-i]?[A-Za-z][$]?(\\d+)'
. '$~u';

private const UTF8 = 'UTF-8';

/**
* The index of the character we are currently looking at.
*/
Expand Down Expand Up @@ -991,37 +993,38 @@ private function advance(): void
{
$token = '';
$i = $this->currentCharacter;
$formula_length = strlen($this->formula);
$formula = mb_str_split($this->formula, 1, self::UTF8);
$formula_length = count($formula);
// eat up white spaces
if ($i < $formula_length) {
while ($this->formula[$i] == ' ') {
while ($formula[$i] === ' ') {
++$i;
}

if ($i < ($formula_length - 1)) {
$this->lookAhead = $this->formula[$i + 1];
$this->lookAhead = $formula[$i + 1];
}
$token = '';
}

while ($i < $formula_length) {
$token .= $this->formula[$i];
$token .= $formula[$i];

if ($i < ($formula_length - 1)) {
$this->lookAhead = $this->formula[$i + 1];
$this->lookAhead = $formula[$i + 1];
} else {
$this->lookAhead = '';
}

if ($this->match($token) != '') {
if ($this->match($token) !== '') {
$this->currentCharacter = $i + 1;
$this->currentToken = $token;

return;
}

if ($i < ($formula_length - 2)) {
$this->lookAhead = $this->formula[$i + 2];
$this->lookAhead = $formula[$i + 2];
} else { // if we run out of characters lookAhead becomes empty
$this->lookAhead = '';
}
Expand Down Expand Up @@ -1198,8 +1201,8 @@ private function match(string $token): string
public function parse(string $formula): bool
{
$this->currentCharacter = 0;
$this->formula = (string) $formula;
$this->lookAhead = $formula[1] ?? '';
$this->formula = $formula;
$this->lookAhead = mb_substr($formula, 1, 1, self::UTF8);
$this->advance();
$this->parseTree = $this->condition();

Expand Down
65 changes: 65 additions & 0 deletions tests/PhpSpreadsheetTests/Writer/Xls/NonLatinFormulasTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
<?php

namespace PhpOffice\PhpSpreadsheetTests\Writer\Xls;

use PhpOffice\PhpSpreadsheet\Cell\DataValidation;
use PhpOffice\PhpSpreadsheet\Cell\DataValidator;
use PhpOffice\PhpSpreadsheet\NamedRange;
use PhpOffice\PhpSpreadsheet\Spreadsheet;
use PhpOffice\PhpSpreadsheetTests\Functional\AbstractFunctional;

class NonLatinFormulasTest extends AbstractFunctional
{
public function testNonLatin(): void
{
$spreadsheet = new Spreadsheet();
$worksheet = $spreadsheet->getActiveSheet();

$validation = $worksheet->getCell('B1')->getDataValidation();
$validation->setType(DataValidation::TYPE_LIST);
$validation->setErrorStyle(DataValidation::STYLE_STOP);
$validation->setAllowBlank(false);
$validation->setShowInputMessage(true);
$validation->setShowErrorMessage(true);
$validation->setShowDropDown(true);
$validation->setFormula1('"слово, сло"');

$dataValidator = new DataValidator();
$worksheet->getCell('B1')->setValue('слово');
self::assertTrue(
$dataValidator->isValid($worksheet->getCell('B1'))
);
$worksheet->getCell('B1')->setValue('слов');
self::assertFalse(
$dataValidator->isValid($worksheet->getCell('B1'))
);

$worksheet->setTitle('словслов');
$worksheet->getCell('A1')->setValue('=словслов!B1');
$worksheet->getCell('A2')->setValue("='словслов'!B1");
$spreadsheet->addNamedRange(new NamedRange('слсл', $worksheet, '$B$1'));
$worksheet->getCell('A3')->setValue('=слсл');

$robj = $this->writeAndReload($spreadsheet, 'Xls');
$spreadsheet->disconnectWorksheets();
$sheet0 = $robj->getActiveSheet();
self::assertSame('словслов', $sheet0->getTitle());
self::assertSame('=словслов!B1', $sheet0->getCell('A1')->getValue());
self::assertSame('слов', $sheet0->getCell('A1')->getCalculatedValue());
// Quotes around sheet name are stripped off - harmless
//self::assertSame("='словслов'!B1", $sheet0->getCell('A2')->getValue());
self::assertSame('слов', $sheet0->getCell('A2')->getCalculatedValue());
// Formulas with defined names don't work in Xls Writer
//self::assertSame('=слсл', $sheet0->getCell('A3')->getValue());
// But result should be accurate
self::assertSame('слов', $sheet0->getCell('A3')->getCalculatedValue());
$names = $robj->getDefinedNames();
self::assertCount(1, $names);
// name has been uppercased
$namedRange = $names['СЛСЛ'] ?? null;
self::assertInstanceOf(NamedRange::class, $namedRange);
self::assertSame('$B$1', $namedRange->getRange());

$robj->disconnectWorksheets();
}
}

0 comments on commit ac34f58

Please sign in to comment.