在DLA中執行以下SQL分別為OSS中的日志文件webserver.log、ngnix_log.log、log4j_sample.log創建對應的表。
-
登錄DLA控制臺。
-
單擊左側導航欄的訪問點管理,然后單擊登錄DMS,執行以下SQL創建OSS Schema。
您也可以通過MySQL客戶端或者程序代碼等方式鏈接DLA,然后執行以下SQL創建OSS Schema。
CREATE SCHEMA oss_log_schema with DBPROPERTIES( catalog='oss', location = 'oss://oss-bucket-name/log/' );
-
catalog:指定創建的Schema類型為OSS。
-
location:文件所在的OSS Bucket目錄,需以
/
結尾。
步驟二:創建表
-
webserver.log
CREATE EXTERNAL TABLE webserver_log( host STRING, identity STRING, userName STRING, time STRING, request STRING, status STRING, size INT, referer STRING, agent STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe' WITH SERDEPROPERTIES ( "input.regex" = "([^ ]*) ([^ ]*) ([^ ]*) (-|\\[[^\\]]*\\]) ([^ \"]*|\"[^\"]*\") (-|[0-9]*) (-|[0-9]*)(?: ([^ \"]*|\"[^\"]*\") ([^ \"]*|\"[^\"]*\"))?" ) STORED AS TEXTFILE LOCATION 'oss://oss-bucket-name/log/webserver.log';
-
ngnix_log.log
CREATE EXTERNAL TABLE ngnix_log( remote_address STRING, identity STRING, remote_user STRING, time_local STRING, request STRING, status STRING, body_bytes_sent INT, http_referer STRING, http_user_agent STRING, gzip_ratio STRING ) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe' WITH SERDEPROPERTIES ( "input.regex" = "([^ ]*) ([^ ]*) ([^ ]*) (-|\\[[^\\]]*\\]) ([^ \"]*|\"[^\"]*\") (-|[0-9]*) (-|[0-9]*)(?: ([^ \"]*|\"[^\"]*\") ([^ \"]*|\"[^\"]*\"))? ([^ \"]*|\"[^\"]*\")" ) STORED AS TEXTFILE LOCATION 'oss://oss-bucket-name/log/ngnix_log';
-
log4j_sample.log
CREATE EXTERNAL TABLE log4j_log( date STRING, time STRING, level STRING, class STRING, details STRING ) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe' WITH SERDEPROPERTIES ( "input.regex" = "^(\\d{4}-\\d{2}-\\d{2})\\s+(\\d{2}.\\d{2}.\\d{2}.\\d{3})\\s+(\\S+)\\s+(\\S+)\\s+(.*)$" ) STORED AS TEXTFILE LOCATION 'oss://oss-bucket-name/log/log4j_sample.log';
步驟三:讀取日志文件數據
表創建成功后,您可以在DLA中通過SELECT查詢并分析OSS日志文件數據,幫助定位故障原因。
-
log4j_sample.log
SELECT * FROM oss_log_schema.log4j_log
-
ngnix_log
SELECT * FROM oss_log_schema.ngnix_log
-
webserver_log
SELECT * FROM oss_log_schema.webserver_log