/** * Copyright 2019-2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef GE_OMM_CSA_INTERACT_H_ #define GE_OMM_CSA_INTERACT_H_ #include #include "framework/common/ge_inner_error_codes.h" namespace ge { enum JobState { JOBSTATE_WAITING = 1, JOBSTATE_RUNNING, JOBSTATE_KILLING, JOBSTATE_SUCCEED, JOBSTATE_FAILED, JOBSTATE_KILLED, JOBSTATE_UNKOWN }; enum JobSubState { JOBSUBSTATE_ENV_INIT = 201, JOBSUBSTATE_ENV_FIN, JOBSUBSTATE_RESOUCE_ALLOC, JOBSUBSTATE_MODEL_COMPILE, JOBSUBSTATE_GRAPH_PREPARE, JOBSUBSTATE_GRAPH_SPLIT, JOBSUBSTATE_GRAPH_OPTIMIZE, JOBSUBSTATE_GRAPH_BUILD, JOBSUBSTATE_GRAPH_LOAD, JOBSUBSTATE_GRAPH_EXEC, JOBSUBSTATE_GRAPH_UNLOAD, JOBSUBSTATE_OTHER }; enum ErrorModule { ERROR_MODULE_DRIVER = 0x01, ERROR_MODULE_RUNTIME = 0x04, ERROR_MODULE_CCE = 0x06, ERROR_MODULE_FMK = 0x08, ERROR_MODULE_HCCL = 0x12 }; struct CsaErrorCode { CsaErrorCode() : module_ret_errcode(0), error_module(ERROR_MODULE_FMK), job_sub_state(JOBSUBSTATE_OTHER) {} ~CsaErrorCode() {} uint32_t module_ret_errcode; ErrorModule error_module; JobSubState job_sub_state; }; class CsaInteract { public: /// /// @brief Obtain CsaInteract instance /// @return CsaInteract instance /// static CsaInteract& GetInstance(); /// /// @brief CsaInteract instance initialization /// @param [in] dev_index device index /// @param [in] job_id job id /// @return void /// void Init(int32_t dev_index, int64_t job_id); /// /// @brief Update job state file /// @param [in] job_state job state /// @param [in] job_sub_state detailed job state /// @param [in] module_ret_errcode sub module training failure error code /// @param [in] error_module error module identified by FMK /// @return Status /// Status WriteJobState(JobState job_state, JobSubState job_sub_state = JOBSUBSTATE_OTHER, uint32_t module_ret_errcode = SUCCESS, ErrorModule error_module = ERROR_MODULE_FMK); /// /// @brief Update error code in the job state file /// @param [in] module_ret_errcode sub module training failure error code /// @param [in] error_module error module identified by FMK /// @param [in] job_sub_state detailed job state /// @return void /// void WriteErrorCode(uint32_t module_ret_errcode, ErrorModule error_module, JobSubState job_sub_state); /// /// @brief Record errors that occurred durning the training /// @param [in] module_ret_errcode sub module training failure error code /// @param [in] error_module error module identified by FMK /// @param [in] job_sub_state detailed job state /// @return void /// void StoreInternalErrorCode(uint32_t module_ret_errcode, ErrorModule error_module, JobSubState job_sub_state); /// /// @brief Update training error code in the job state file /// @return void /// void WriteInternalErrorCode(); /// /// @brief Update network connectivity detect file /// @param [in] content network connectivity content /// @return Status /// Status WriteHcomDetection(const std::string& content); private: CsaInteract() : dev_index_(0), job_id_(0), is_init_(false), curr_state_(JOBSTATE_UNKOWN), is_have_internal_error_(false) {} ~CsaInteract() {} CsaInteract(const CsaInteract&) = delete; CsaInteract(CsaInteract&&) = delete; CsaInteract& operator=(const CsaInteract&) = delete; CsaInteract& operator=(CsaInteract&&) = delete; /// /// @ingroup WriteFile /// @brief Write the content into the file. If the file does not exist, create the file /// @param [in] file_name: File name to be written /// @param [in] content: Contents to be written /// @return Status /// Status WriteFile(const std::string& file_name, const std::string& content); /// /// @ingroup MakePath /// @brief Verify whether the file path exists, if not, recursively create the folder /// @param [in] file_name: File name to be verified /// @return Status /// Status MakePath(const std::string& file_name); // device index int32_t dev_index_; // job id int64_t job_id_; // is initialization complete bool is_init_; // current job state JobState curr_state_; // job state file std::string job_state_file_; // network connectivity detect file std::string hcom_detect_file_; // identification of internal errors that occurred during the training bool is_have_internal_error_; // error code information CsaErrorCode csa_error_code_; }; } // namespace ge #endif // GE_OMM_CSA_INTERACT_H_