taosdata / TDengine

High-performance, scalable time-series database designed for Industrial IoT (IIoT) scenarios
https://tdengine.com
GNU Affero General Public License v3.0
23.39k stars 4.86k forks source link

insert data failed in batch #17060

Closed lw-leo closed 6 months ago

lw-leo commented 2 years ago

描述 向多个表插入记录失败,调整sql插入语句顺序后成功

复现环境

一、测试环境

taosd -V
community version: 3.0.1.2 compatible_version: 3.0.0.0
gitinfo: 287796b014d77d4127132c5aeb88453005dcb092
buildInfo: Built at 2022-09-22 15:33:26

taos -V
version: 3.0.1.2

uname -a
Linux leo 5.4.0-125-generic #141-Ubuntu SMP Wed Aug 10 13:42:03 UTC 2022 x86_64 x86_64 x86_64 GNU/Linux

二、表结构

taos> describe remote_control_db.rc_00000;
 ts                             | TIMESTAMP            |           8 |          |
 h_version                      | VARCHAR              |          64 |          |
 s_version                      | VARCHAR              |          64 |          |
 use_count                      | SMALLINT UNSIGNED    |           2 |          |
 ltp                            | TINYINT UNSIGNED     |           1 |          |
 rtp                            | TINYINT UNSIGNED     |           1 |          |
 failure                        | BIGINT UNSIGNED      |           8 |          |
 sn                             | VARCHAR              |          64 | TAG      |

taos> describe `sby_db`.`sby_00000`;
             field              |         type         |   length    |   note   |
=================================================================================
 ts                             | TIMESTAMP            |           8 |          |
 version                        | VARCHAR              |          64 |          |
 voltage                        | FLOAT                |           4 |          |
 output_power                   | FLOAT                |           4 |          |
 failure                        | BIGINT UNSIGNED      |           8 |          |
 sn                             | VARCHAR              |          64 | TAG      |
Query OK, 6 rows in database (0.001523s)

taos> describe `joystick_db`.`joy_00000`;
             field              |         type         |   length    |   note   |
=================================================================================
 ts                             | TIMESTAMP            |           8 |          |
 type                           | TINYINT UNSIGNED     |           1 |          |
 h_version                      | VARCHAR              |          64 |          |
 s_version                      | VARCHAR              |          64 |          |
 use_count                      | INT UNSIGNED         |           4 |          |
 x                              | FLOAT                |           4 |          |
 y                              | FLOAT                |           4 |          |
 z                              | FLOAT                |           4 |          |
 failure                        | BIGINT UNSIGNED      |           8 |          |
 sn                             | VARCHAR              |          64 | TAG      |

三、失败插入sql

insert into 
   `sby_db`.`sby_00000` values ( '2022-09-22T15:17:34.210590488+08:00', 'sby_v_0001', 1.000000, 1.000000, 1 ) 
   `joystick_db`.`joy_00000` values ( '2022-09-22T15:17:34.210596098+08:00', 1 ,  'J_H_0001', 'S_H_0001', 1, 1.000000, 1.000000, 1.000000, 1 ) 
   `remote_control_db`.`rc_00000` values ( '2022-09-22T15:17:34.210598197+08:00', 'H_0001', 'S_0001', 1, 1, 1, 1 );

DB error: syntax error near 'S_H_0001' (invalid unsigned smallint data) (0.003503s)

四、成功插入sql

insert into 
   `remote_control_db`.`rc_00000` values ( '2022-09-22T15:17:34.210598197+08:00', 'H_0001', 'S_0001', 1, 1, 1, 1 )
   `sby_db`.`sby_00000` values ( '2022-09-22T15:17:34.210590488+08:00', 'sby_v_0001', 1.000000, 1.000000, 1 ) 
   `joystick_db`.`joy_00000` values ( '2022-09-22T15:17:34.210596098+08:00', 1 ,  'J_H_0001', 'S_H_0001', 1, 1.000000, 1.000000, 1.000000, 1 );

BUG分析 分析源码后基本锁定问题所在(自认为)

source/libs/parser/src/parUtil.c

int32_t reserveTableMetaInCacheForInsert(const SName* pName, ECatalogReqType reqType, int32_t tableNo,
                                         SParseMetaCache* pMetaCache) {
  ···
  pMetaCache->sqlTableNum = tableNo; // tableNo是索引,赋值给Num导致sqlTableNum会比实际长度小1  
  ···    
}

static int32_t buildCatalogReqForInsert(SParseContext* pCxt, const SParseMetaCache* pMetaCache,
                                        SCatalogReq* pCatalogReq) {
    ...

        pCxt->pTableMetaPos = taosArrayInit(pMetaCache->sqlTableNum, sizeof(int32_t));   
    pCxt->pTableVgroupPos = taosArrayInit(pMetaCache->sqlTableNum, sizeof(int32_t));

    SInsertTablesMetaReq* p = taosHashIterate(pMetaCache->pInsertTables, NULL);
    while (NULL != p) {
     ...    
        for (int32_t i = 0; i < ntables; ++i) {
            taosArrayInsert(pCxt->pTableMetaPos, *(int32_t*)taosArrayGet(p->pTableMetaPos, i), &metaReqNo);     // BUG 
            ++metaReqNo;
        }
       for (int32_t i = 0; i < ntables; ++i) {
            taosArrayInsert(pCxt->pTableVgroupPos, *(int32_t*)taosArrayGet(p->pTableVgroupPos, i), &vgroupReqNo); // BUG
            ++vgroupReqNo;
       }
     ...
     p = taosHashIterate(pMetaCache->pInsertTables, p);
   }
   ...
}

source/util/src/tarray.c 

void* taosArrayInsert(SArray* pArray, size_t index, void* pData) {
  if (pArray == NULL || pData == NULL) {
    return NULL;
  }

  if (index >= pArray->size) {
    return taosArrayPush(pArray, pData);
  }

  if (pArray->size >= pArray->capacity) {
    int32_t ret = taosArrayResize(pArray);

    if (ret < 0) {
      return NULL;
    }
  }

  void* dst = TARRAY_GET_ELEM(pArray, index);

  int32_t remain = (int32_t)(pArray->size - index);
  memmove((char*)dst + pArray->elemSize, (char*)dst, pArray->elemSize * remain);
  memcpy(dst, pData, pArray->elemSize);

  pArray->size += 1;

  return dst;
}

void taosArraySet(SArray* pArray, size_t index, void* pData) {
  assert(index < pArray->size);
  memcpy(TARRAY_GET_ELEM(pArray, index), pData, pArray->elemSize);
}

测试发现 (int32_t)taosArrayGet(p->pTableMetaPos, i) 出来的结果是无序的taosArrayInsert在执行插入时会copy插入位置之后的数据,导致pCxt->pTableMetaPos存储的value值发生了变化最终导致分析sql时读取表信息是出现错位的情况。

将taosArrayInsert修改为taosArraySet (需要在taosArrayInit后设置size,需要在sqlTableNum基础上加1)后问题不在,项目中还有一些地方使用了taosArrayInsert,不知道是否还会有这种类似的错误使用场景。

(注: 如有理解不对的地方还请原谅并指出)

yu285 commented 2 years ago

感谢提出,我会看下

yu285 commented 2 years ago

可以提供下建表语句不

lw-leo commented 2 years ago

@yu285 创建库和超级表以及子表

create database if not exists `remote_control_db`;
create database if not exists `joystick_db`;
create database if not exists `sby_db`;

create stable if not exists `remote_control_db`.`remote_control_st` (
            ts              timestamp       ,
            h_version       binary(64)          ,
            s_version       binary(64)          ,
            use_count       smallint unsigned   ,
            ltp             tinyint unsigned    ,
            rtp             tinyint unsigned    ,
            failure         bigint unsigned      
        ) tags (
            sn binary(64)
        );

create stable if not exists `joystick_db`.`joystick_st` (
            ts         timestamp        ,
            type        tinyint unsigned       ,
            h_version   binary(64)      ,
            s_version   binary(64)      ,
            use_count   int unsigned    ,
            x           float           ,
            y           float           ,
            z           float           ,
            failure     bigint unsigned  
        ) tags (
            sn binary(64)
        );

create stable if not exists `sby_db`.`sby_st` (
            ts          timestamp   ,
            version         binary(64)      ,
            voltage         float       ,
            output_power    float   ,
            failure         bigint unsigned  
        ) tags (
            sn binary(64)
        );

create table if not exists `sby_db`.`sby_00000` using `sby_db`.`sby_st` tags ('ID_00000');
create table if not exists `joystick_db`.`joy_00000` using `joystick_db`.`joystick_st` tags ('ID_00000');
create table if not exists `remote_control_db`.`rc_00000` using `remote_control_db`.`remote_control_st` tags ('ID_00000');

可以提供下建表语句不

yu285 commented 6 months ago

3.0早期版本的bug 都已经都修复了,但是3.0.2.5 之前可能与现在的最新版(3.3.0.0)并不兼容,所以无法直接升级。因此建议重新部署一下最新的 3.3.0.0 ,后续升级都很方便,直接安装最新版软件即可。

开源版支持操作系统范围:https://docs.taosdata.com/reference/support-platform/

如果还有类似问题可以加微信 a15652223354