Compare commits

...

1 Commits

Author SHA1 Message Date
skjnldsv 59871af530 fix(systemtags): remove duplicates, prevent and sanitize existing tags
Signed-off-by: skjnldsv <skjnldsv@protonmail.com>
2025-11-06 08:12:23 +01:00
12 changed files with 481 additions and 3 deletions
+1 -1
View File
@@ -162,7 +162,7 @@ class SystemTagPlugin extends \Sabre\DAV\ServerPlugin {
throw new BadRequest('Missing "name" attribute');
}
$tagName = $data['name'];
$tagName = Util::sanitizeWordsAndEmojis($data['name']);
$userVisible = true;
$userAssignable = true;
@@ -130,6 +130,7 @@ return array(
'OCA\\Settings\\SetupChecks\\PushService' => $baseDir . '/../lib/SetupChecks/PushService.php',
'OCA\\Settings\\SetupChecks\\RandomnessSecure' => $baseDir . '/../lib/SetupChecks/RandomnessSecure.php',
'OCA\\Settings\\SetupChecks\\ReadOnlyConfig' => $baseDir . '/../lib/SetupChecks/ReadOnlyConfig.php',
'OCA\\Settings\\SetupChecks\\RepairSanitizeSystemTagsAvailable' => $baseDir . '/../lib/SetupChecks/RepairSanitizeSystemTagsAvailable.php',
'OCA\\Settings\\SetupChecks\\SchedulingTableSize' => $baseDir . '/../lib/SetupChecks/SchedulingTableSize.php',
'OCA\\Settings\\SetupChecks\\SecurityHeaders' => $baseDir . '/../lib/SetupChecks/SecurityHeaders.php',
'OCA\\Settings\\SetupChecks\\ServerIdConfig' => $baseDir . '/../lib/SetupChecks/ServerIdConfig.php',
@@ -145,6 +145,7 @@ class ComposerStaticInitSettings
'OCA\\Settings\\SetupChecks\\PushService' => __DIR__ . '/..' . '/../lib/SetupChecks/PushService.php',
'OCA\\Settings\\SetupChecks\\RandomnessSecure' => __DIR__ . '/..' . '/../lib/SetupChecks/RandomnessSecure.php',
'OCA\\Settings\\SetupChecks\\ReadOnlyConfig' => __DIR__ . '/..' . '/../lib/SetupChecks/ReadOnlyConfig.php',
'OCA\\Settings\\SetupChecks\\RepairSanitizeSystemTagsAvailable' => __DIR__ . '/..' . '/../lib/SetupChecks/RepairSanitizeSystemTagsAvailable.php',
'OCA\\Settings\\SetupChecks\\SchedulingTableSize' => __DIR__ . '/..' . '/../lib/SetupChecks/SchedulingTableSize.php',
'OCA\\Settings\\SetupChecks\\SecurityHeaders' => __DIR__ . '/..' . '/../lib/SetupChecks/SecurityHeaders.php',
'OCA\\Settings\\SetupChecks\\ServerIdConfig' => __DIR__ . '/..' . '/../lib/SetupChecks/ServerIdConfig.php',
@@ -67,6 +67,7 @@ use OCA\Settings\SetupChecks\PhpOutputBuffering;
use OCA\Settings\SetupChecks\PushService;
use OCA\Settings\SetupChecks\RandomnessSecure;
use OCA\Settings\SetupChecks\ReadOnlyConfig;
use OCA\Settings\SetupChecks\RepairSanitizeSystemTagsAvailable;
use OCA\Settings\SetupChecks\SchedulingTableSize;
use OCA\Settings\SetupChecks\SecurityHeaders;
use OCA\Settings\SetupChecks\ServerIdConfig;
@@ -207,6 +208,7 @@ class Application extends App implements IBootstrap {
$context->registerSetupCheck(PhpOutputBuffering::class);
$context->registerSetupCheck(RandomnessSecure::class);
$context->registerSetupCheck(ReadOnlyConfig::class);
$context->registerSetupCheck(RepairSanitizeSystemTagsAvailable::class);
$context->registerSetupCheck(SecurityHeaders::class);
$context->registerSetupCheck(ServerIdConfig::class);
$context->registerSetupCheck(SchedulingTableSize::class);
@@ -0,0 +1,41 @@
<?php
declare(strict_types=1);
/**
* SPDX-FileCopyrightText: 2024 Nextcloud GmbH and Nextcloud contributors
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
namespace OCA\Settings\SetupChecks;
use OC\Repair\RepairSanitizeSystemTags;
use OCP\IL10N;
use OCP\SetupCheck\ISetupCheck;
use OCP\SetupCheck\SetupResult;
class RepairSanitizeSystemTagsAvailable implements ISetupCheck {
public function __construct(
private RepairSanitizeSystemTags $repairSanitizeSystemTags,
private IL10N $l10n,
) {
}
public function getCategory(): string {
return 'system';
}
public function getName(): string {
return $this->l10n->t('Sanitize and merge duplicate system tags available');
}
public function run(): SetupResult {
if ($this->repairSanitizeSystemTags->migrationsAvailable()) {
return SetupResult::warning(
$this->l10n->t('One or more system tags need to be sanitized or merged. This can take a long time on larger instances so this is not done automatically during upgrades. Use the command `occ maintenance:repair --include-expensive` to perform the migrations.'),
);
} else {
return SetupResult::success('None');
}
}
}
@@ -2007,6 +2007,7 @@ return array(
'OC\\Repair\\RepairInvalidShares' => $baseDir . '/lib/private/Repair/RepairInvalidShares.php',
'OC\\Repair\\RepairLogoDimension' => $baseDir . '/lib/private/Repair/RepairLogoDimension.php',
'OC\\Repair\\RepairMimeTypes' => $baseDir . '/lib/private/Repair/RepairMimeTypes.php',
'OC\\Repair\\RepairSanitizeSystemTags' => $baseDir . '/lib/private/Repair/RepairSanitizeSystemTags.php',
'OC\\RichObjectStrings\\RichTextFormatter' => $baseDir . '/lib/private/RichObjectStrings/RichTextFormatter.php',
'OC\\RichObjectStrings\\Validator' => $baseDir . '/lib/private/RichObjectStrings/Validator.php',
'OC\\Route\\CachingRouter' => $baseDir . '/lib/private/Route/CachingRouter.php',
@@ -2048,6 +2048,7 @@ class ComposerStaticInit749170dad3f5e7f9ca158f5a9f04f6a2
'OC\\Repair\\RepairInvalidShares' => __DIR__ . '/../../..' . '/lib/private/Repair/RepairInvalidShares.php',
'OC\\Repair\\RepairLogoDimension' => __DIR__ . '/../../..' . '/lib/private/Repair/RepairLogoDimension.php',
'OC\\Repair\\RepairMimeTypes' => __DIR__ . '/../../..' . '/lib/private/Repair/RepairMimeTypes.php',
'OC\\Repair\\RepairSanitizeSystemTags' => __DIR__ . '/../../..' . '/lib/private/Repair/RepairSanitizeSystemTags.php',
'OC\\RichObjectStrings\\RichTextFormatter' => __DIR__ . '/../../..' . '/lib/private/RichObjectStrings/RichTextFormatter.php',
'OC\\RichObjectStrings\\Validator' => __DIR__ . '/../../..' . '/lib/private/RichObjectStrings/Validator.php',
'OC\\Route\\CachingRouter' => __DIR__ . '/../../..' . '/lib/private/Route/CachingRouter.php',
+2
View File
@@ -57,6 +57,7 @@ use OC\Repair\RepairDavShares;
use OC\Repair\RepairInvalidShares;
use OC\Repair\RepairLogoDimension;
use OC\Repair\RepairMimeTypes;
use OC\Repair\RepairSanitizeSystemTags;
use OC\Template\JSCombiner;
use OCA\DAV\Migration\DeleteSchedulingObjects;
use OCA\DAV\Migration\RemoveObjectProperties;
@@ -221,6 +222,7 @@ class Repair implements IOutput {
),
\OCP\Server::get(DeleteSchedulingObjects::class),
\OC::$server->get(RemoveObjectProperties::class),
\OCP\Server::get(RepairSanitizeSystemTags::class),
];
}
@@ -0,0 +1,352 @@
<?php
/**
* SPDX-FileCopyrightText: 2025 Nextcloud GmbH and Nextcloud contributors
* SPDX-License-Identifier: AGPL-3.0-only
*/
declare(strict_types=1);
namespace OC\Repair;
use OC\Migration\NullOutput;
use OCP\DB\QueryBuilder\IQueryBuilder;
use OCP\IDBConnection;
use OCP\Migration\IOutput;
use OCP\Migration\IRepairStep;
use OCP\Util;
class RepairSanitizeSystemTags implements IRepairStep {
private bool $dryRun = false;
private int $changeCount = 0;
public function __construct(
protected IDBConnection $connection,
) {
}
public function getName(): string {
return 'Sanitize and merge duplicate system tags';
}
public function migrationsAvailable(): bool {
$this->dryRun = true;
$this->sanitizeAndMergeTags(new NullOutput());
$this->dryRun = false;
return $this->changeCount > 0;
}
public function run(IOutput $output): void {
$this->dryRun = false;
$this->sanitizeAndMergeTags($output);
}
private function sanitizeAndMergeTags(IOutput $output): void {
$output->info('Starting sanitization of system tags...');
$tags = $this->getAllTags();
// Group tags by sanitized name
$sanitizedMap = [];
$totalTags = 0;
foreach ($tags as $tag) {
$sanitizedMap[$tag['sanitizedName']][] = $tag;
$totalTags++;
}
$sanitizeCount = count($sanitizedMap);
$output->info("Found $totalTags tags with $sanitizeCount unique sanitized names.");
// Get object counts for all tags in one query
$objectCounts = [];
foreach ($this->getAllTagObjectCounts() as $tagId => $count) {
$objectCounts[$tagId] = $count;
}
// Process each sanitized name group
foreach ($sanitizedMap as $sanitizedName => $group) {
// Single tag, no duplicates found
if (count($group) === 1) {
$tag = $group[0];
if ($tag['originalName'] !== $sanitizedName) {
if (!$this->dryRun) {
$qb = $this->connection->getQueryBuilder();
$qb->update('systemtag')
->set('name', $qb->createNamedParameter($sanitizedName))
->where($qb->expr()->eq('id', $qb->createNamedParameter($tag['id'])))
->executeStatement();
}
$this->changeCount++;
$output->info("Sanitized tag ID {$tag['id']}: '{$tag['originalName']}' → '$sanitizedName'");
}
continue;
}
// Multiple tags with same sanitized name - merge them
$this->mergeTagGroup($group, $sanitizedName, $objectCounts, $output);
}
$output->info('System tag sanitization and merge completed.');
}
private function mergeTagGroup(array $group, string $sanitizedName, array $objectCounts, IOutput $output): void {
// Validate that all tags in the group have the same visibility and editable settings
$firstTag = $group[0];
$visibility = $firstTag['visibility'];
$editable = $firstTag['editable'];
foreach ($group as $tag) {
if ($tag['visibility'] !== $visibility || $tag['editable'] !== $editable) {
$output->warning(
"Cannot merge tag group '$sanitizedName': tags have different visibility or editable settings. "
. 'Manual verification required. Tag IDs: ' . implode(', ', array_column($group, 'id'))
);
return;
}
}
// Determine which tag to keep (most object mappings, then lowest ID as tiebreaker)
$keepTag = null;
$maxCount = -1;
foreach ($group as $tag) {
$count = $objectCounts[$tag['id']] ?? 0;
if ($count > $maxCount || ($count === $maxCount && ($keepTag === null || $tag['id'] < $keepTag['id']))) {
$maxCount = $count;
$keepTag = $tag;
}
}
$keepId = $keepTag['id'];
if ($keepTag === null) {
$output->warning("Cannot merge tag group '$sanitizedName': unable to determine which tag to keep");
return;
}
$duplicateIds = array_filter(array_column($group, 'id'), fn ($id) => $id !== $keepId);
if (empty($duplicateIds)) {
return;
}
if (!$this->dryRun) {
$this->connection->beginTransaction();
try {
// Step 1: Delete ALL mappings from duplicate tags that conflict with keepId
// This must happen FIRST before any updates to avoid unique constraint violations
$this->deleteConflictingMappings($duplicateIds, $keepId);
// Step 2: Update all remaining mappings from duplicates to keepId
// These won't conflict because we just deleted the conflicts
$qb = $this->connection->getQueryBuilder();
$qb->update('systemtag_object_mapping')
->set('systemtagid', $qb->createNamedParameter($keepId))
->where($qb->expr()->in('systemtagid', $qb->createNamedParameter($duplicateIds, IQueryBuilder::PARAM_INT_ARRAY)))
->executeStatement();
// Step 3: Delete duplicate tags in bulk (safe now that mappings are gone)
$qb = $this->connection->getQueryBuilder();
$qb->delete('systemtag')
->where($qb->expr()->in('id', $qb->createNamedParameter($duplicateIds, IQueryBuilder::PARAM_INT_ARRAY)))
->executeStatement();
// Step 4: Sanitize the kept tag name if needed
// This is safe because we've already deleted all duplicates with the same sanitized name
if ($keepTag['originalName'] !== $sanitizedName) {
$qb = $this->connection->getQueryBuilder();
$qb->update('systemtag')
->set('name', $qb->createNamedParameter($sanitizedName))
->where($qb->expr()->eq('id', $qb->createNamedParameter($keepId)))
->executeStatement();
}
$this->connection->commit();
} catch (\Exception $e) {
$this->connection->rollBack();
$output->warning("Failed to merge tag group '$sanitizedName': " . $e->getMessage());
return;
}
}
$this->changeCount += count($duplicateIds);
if ($keepTag['originalName'] !== $sanitizedName) {
$this->changeCount++;
}
$duplicateIdsList = implode(', ', $duplicateIds);
$output->info("Merged tags [$duplicateIdsList] into ID $keepId (sanitized: '$sanitizedName')");
}
/**
* Delete mappings from duplicate tags where the same object is already mapped to keepId
* This prevents unique constraint violations when updating systemtagid
*/
private function deleteConflictingMappings(array $duplicateIds, int $keepId): void {
$batchSize = 1000;
$batch = [];
// Stream keepId mappings and process in batches
$qb = $this->connection->getQueryBuilder();
$qb->select('objectid', 'objecttype')
->from('systemtag_object_mapping')
->where($qb->expr()->eq('systemtagid', $qb->createNamedParameter($keepId)));
$result = $qb->executeQuery();
while ($mapping = $result->fetch()) {
$batch[] = $mapping;
// When batch is full, delete conflicts for this batch
if (count($batch) >= $batchSize) {
$this->deleteBatchConflicts($batch, $duplicateIds);
$batch = []; // Clear batch
}
}
$result->closeCursor();
// Process remaining mappings in the last batch
if (!empty($batch)) {
$this->deleteBatchConflicts($batch, $duplicateIds);
}
}
/**
* Delete mappings in a batch that conflict with keepId mappings
*/
private function deleteBatchConflicts(array $batch, array $duplicateIds): void {
$qb = $this->connection->getQueryBuilder();
$qb->delete('systemtag_object_mapping')
->where($qb->expr()->in('systemtagid', $qb->createNamedParameter($duplicateIds, IQueryBuilder::PARAM_INT_ARRAY)));
$orX = $qb->expr()->orX();
foreach ($batch as $mapping) {
$orX->add($qb->expr()->andX(
$qb->expr()->eq('objectid', $qb->createNamedParameter($mapping['objectid'])),
$qb->expr()->eq('objecttype', $qb->createNamedParameter($mapping['objecttype']))
));
}
$qb->andWhere($orX);
$qb->executeStatement();
}
/**
* Check if a tag name already exists (excluding a specific tag ID)
*/
private function tagNameExists(string $name, int $excludeId): bool {
$qb = $this->connection->getQueryBuilder();
$qb->select('id')
->from('systemtag')
->where($qb->expr()->eq('name', $qb->createNamedParameter($name)))
->andWhere($qb->expr()->neq('id', $qb->createNamedParameter($excludeId)))
->setMaxResults(1);
$result = $qb->executeQuery();
$exists = $result->fetch() !== false;
$result->closeCursor();
return $exists;
}
// Fetch all tags in batches to avoid memory issues
private function getAllTags(int $offset = 0, ?int $limit = null): \Iterator {
$maxBatchSize = 1000;
do {
if ($limit !== null) {
$batchSize = min($limit, $maxBatchSize);
$limit -= $batchSize;
} else {
$batchSize = $maxBatchSize;
}
$tags = $this->getTags($batchSize, $offset);
$offset += $batchSize;
foreach ($tags as $tag) {
yield $tag;
}
} while (count($tags) === $batchSize && $limit !== 0);
}
// Fetch tags from the database
private function getTags($limit = null, $offset = null): array {
$qb = $this->connection->getQueryBuilder();
$qb->select('id', 'name', 'visibility', 'editable')
->from('systemtag')
->orderBy('name')
->addOrderBy('id');
$tags = [];
// Apply limit and offset if provided
if ($limit !== null) {
$qb->setMaxResults($limit);
}
if ($offset !== null) {
$qb->setFirstResult($offset);
}
// Fetch and return tags
$result = $qb->executeQuery();
while ($row = $result->fetch()) {
$tags[] = [
'id' => (int)$row['id'],
'originalName' => $row['name'],
'sanitizedName' => Util::sanitizeWordsAndEmojis($row['name']),
'visibility' => (int)$row['visibility'],
'editable' => (int)$row['editable'],
];
}
$result->closeCursor();
return $tags;
}
private function getAllTagObjectCounts(int $offset = 0, ?int $limit = null): \Iterator {
$maxBatchSize = 1000;
do {
if ($limit !== null) {
$batchSize = min($limit, $maxBatchSize);
$limit -= $batchSize;
} else {
$batchSize = $maxBatchSize;
}
$counts = $this->getTagObjectCounts($batchSize, $offset);
$offset += $batchSize;
foreach ($counts as $tagId => $count) {
yield $tagId => $count;
}
} while (count($counts) === $batchSize && $limit !== 0);
}
// Get object counts for all tags in one efficient query
private function getTagObjectCounts($limit = null, $offset = null): array {
$qb = $this->connection->getQueryBuilder();
$qb->select('systemtagid')
->selectAlias($qb->createFunction('COUNT(*)'), 'cnt')
->from('systemtag_object_mapping')
->groupBy('systemtagid');
$counts = [];
// Apply limit and offset if provided
if ($limit !== null) {
$qb->setMaxResults($limit);
}
if ($offset !== null) {
$qb->setFirstResult($offset);
}
// Fetch and return counts
$result = $qb->executeQuery();
while ($row = $result->fetch()) {
$counts[(int)$row['systemtagid']] = (int)$row['cnt'];
}
$result->closeCursor();
return $counts;
}
}
+5 -2
View File
@@ -23,6 +23,7 @@ use OCP\SystemTag\TagAlreadyExistsException;
use OCP\SystemTag\TagCreationForbiddenException;
use OCP\SystemTag\TagNotFoundException;
use OCP\SystemTag\TagUpdateForbiddenException;
use OCP\Util;
/**
* Manager class for system tags
@@ -156,6 +157,8 @@ class SystemTagManager implements ISystemTagManager {
throw new TagCreationForbiddenException();
}
$tagName = Util::sanitizeWordsAndEmojis($tagName);
// Check if tag already exists (case-insensitive)
$existingTags = $this->getAllTags(null, $tagName);
foreach ($existingTags as $existingTag) {
@@ -225,8 +228,9 @@ class SystemTagManager implements ISystemTagManager {
}
$beforeUpdate = array_shift($tags);
$newName = Util::sanitizeWordsAndEmojis($newName);
// Length of name column is 64
$newName = trim($newName);
$truncatedNewName = substr($newName, 0, 64);
$afterUpdate = new SystemTag(
$tagId,
@@ -451,5 +455,4 @@ class SystemTagManager implements ISystemTagManager {
return $groupIds;
}
}
+21
View File
@@ -662,4 +662,25 @@ class Util {
}
return true;
}
/**
* Sanitize a name by removing unwanted characters
*
* This function removes any character that is not a letter, space, or symbol (including emojis).
* It also normalizes spaces by replacing multiple consecutive spaces with a single space and trimming
* leading and trailing spaces.
*
* @param string $input The input string to sanitize
* @return string The sanitized string
* @since 33.0.0
*/
public static function sanitizeWordsAndEmojis(string $input): string {
// Remove control characters and other invisible separators, but keep everything else
$clean = preg_replace('/[\p{C}]+/u', '', $input);
// Normalize whitespace to single spaces
$clean = preg_replace('/[[:space:]]+/u', ' ', trim($clean));
return $clean;
}
}
+53
View File
@@ -416,4 +416,57 @@ class UtilTest extends \Test\TestCase {
$this->assertFalse($result);
}
public static function sanitizeProvider(): array {
return [
// Basic spaces and line controls
['Hello World', 'Hello World'],
[' Hello World ', 'Hello World'],
["Hello\t World \nAgain", 'Hello World Again'],
["Hello\rWorld", 'HelloWorld'],
["Hello\r\nWorld", 'HelloWorld'],
["Hello\u{200B}World", 'HelloWorld'], // zero-width space removed
["Hello\t\n\r World", 'Hello World'],
// Unicode, emoji, and CJK
['テスト 😃 💬', 'テスト 😃 💬'],
['中文測試 ✅', '中文測試 ✅'],
['Русский текст 😁', 'Русский текст 😁'],
['Café crème ☕', 'Café crème ☕'],
// Punctuation and filename-like
['Hello-World_123.', 'Hello-World_123.'],
['File.name, with commas', 'File.name, with commas'],
['Smile — dash', 'Smile — dash'],
['Invalid:/\\?%*|<>name', 'Invalid:/\\?%*|<>name'], // kept as is
['test@example.com', 'test@example.com'],
// Control and invisible chars
["Bad\0Name", 'BadName'],
["Hello\u{0007}World", 'HelloWorld'],
["Line\r\nbreaks", 'Linebreaks'],
["\x1F Hidden control", 'Hidden control'],
// Whitespace and normalization
[" Multiple spaces\t and \nnewlines ", 'Multiple spaces and newlines'],
["No-break\u{00A0}space", 'No-break space'], // NBSP normalized
["Zero\u{2003}width\u{2009}spaces", 'Zero width spaces'], // various spaces
// Complex mixes
['テスト 💬.png', 'テスト 💬.png'],
[' Mix 😎 emojis 🎉 and 123 numbers ', 'Mix 😎 emojis 🎉 and 123 numbers'],
["Hello \u{200B}\n World", 'Hello World'],
['Path ../etc/passwd', 'Path ../etc/passwd'],
['Symbols! @ # % ^ & * ( )', 'Symbols! @ # % ^ & * ( )'],
['Special chars <script>', 'Special chars <script>'],
];
}
/**
* @dataProvider sanitizeProvider
*/
public function testSanitizeWordsAndEmojis(string $input, string $expected): void {
$this->assertSame($expected, Util::sanitizeWordsAndEmojis($input));
}
}