Подготовка окружения


Делаю:
24.08.2022


GitHub:
https://github.com/webmakaka/Kubeflow-for-Machine-Learning-From-Lab-to-Production.git


Инсталляция KFCTL


$ export \
    PROFILE=marley-minikube \
    CPUS=4 \
    MEMORY=15G \
    HDD=30G \
    DRIVER=docker \
    KUBERNETES_VERSION=v1.16.0


$ {
    minikube --profile ${PROFILE} config set memory ${MEMORY}
    minikube --profile ${PROFILE} config set cpus ${CPUS}
    minikube --profile ${PROFILE} config set disk-size ${HDD}

    minikube --profile ${PROFILE} config set driver ${DRIVER}

    minikube --profile ${PROFILE} config set kubernetes-version ${KUBERNETES_VERSION}
    minikube start --profile ${PROFILE} --embed-certs

    // Enable ingress
    minikube addons --profile ${PROFILE} enable ingress

    // Enable registry
    // minikube addons --profile ${PROFILE} enable registry
}


// При необходимости можно будет удалить профиль и все созданное в профиле следующей командой
// $ minikube --profile ${PROFILE} stop && minikube --profile ${PROFILE} delete


$ {
  export KUBEFLOW_PROJECT_NAME=marley-kubeflow \
  export KUBEFLOW_HOME=~/kubeflow/${KUBEFLOW_PROJECT_NAME}
  mkdir -p ${KUBEFLOW_HOME}
  cd ${KUBEFLOW_HOME}
}


// Если каталог не пуст
$ rm -rf *


// This deployment process can take up to 30 minutes.
$ kfctl apply -V -f https://raw.githubusercontent.com/kubeflow/manifests/v1.0-branch/kfdef/kfctl_k8s_istio.v1.0.1.yaml


// Не заработало
// Большое количество ошибок
// $ kfctl apply -V -f https://raw.githubusercontent.com/kubeflow/manifests/v1.2-branch/kfdef/kfctl_k8s_istio.v1.2.0.yaml


$ cd ~/tmp/
$ git clone https://github.com/webmakaka/Kubeflow-for-Machine-Learning-From-Lab-to-Production.git


$ cd ~/tmp/Kubeflow-for-Machine-Learning-From-Lab-to-Production/environment/roles/
$ kubectl apply -f ./


$ kubectl get pods -n kubeflow
NAME                                                          READY   STATUS             RESTARTS   AGE
admission-webhook-bootstrap-stateful-set-0                    1/1     Running            0          36m
admission-webhook-deployment-59bc556b94-6spfv                 0/1     Terminating        0          36m
admission-webhook-deployment-59bc556b94-vnjbn                 1/1     Running            0          35m
application-controller-stateful-set-0                         1/1     Running            0          39m
argo-ui-5f845464d7-fqrzf                                      1/1     Running            0          36m
centraldashboard-d5c6d6bf-drsz5                               1/1     Running            0          36m
jupyter-web-app-deployment-544b7d5684-sbbsd                   1/1     Running            0          36m
katib-controller-6b87947df8-twhf9                             1/1     Running            1          36m
katib-db-manager-54b64f99b-2mk5v                              1/1     Running            1          36m
katib-mysql-74747879d7-z2hgs                                  1/1     Running            0          36m
katib-ui-76f84754b6-twtlr                                     1/1     Running            0          36m
kfserving-controller-manager-0                                1/2     ImagePullBackOff   0          9m45s
metacontroller-0                                              1/1     Running            0          36m
metadata-db-79d6cf9d94-2c8fb                                  1/1     Running            0          36m
metadata-deployment-5dd4c9d4cf-6lcmb                          1/1     Running            0          36m
metadata-envoy-deployment-5b9f9466d9-wkgsg                    1/1     Running            0          36m
metadata-grpc-deployment-66cf7949ff-62kdl                     1/1     Running            3          36m
metadata-ui-8968fc7d9-9cwgm                                   1/1     Running            0          36m
minio-5dc88dd55c-tgxlt                                        1/1     Running            0          36m
ml-pipeline-55b669bf4d-qwcvh                                  1/1     Running            0          36m
ml-pipeline-ml-pipeline-visualizationserver-c489f5dd8-wkgx8   1/1     Running            0          36m
ml-pipeline-persistenceagent-f54b4dcf5-fhrkg                  1/1     Running            1          36m
ml-pipeline-scheduledworkflow-7f5d9d967b-hnmm9                1/1     Running            0          36m
ml-pipeline-ui-7bb97bf8d8-5kpbc                               1/1     Running            0          36m
ml-pipeline-viewer-controller-deployment-584cd7674b-ddb6l     1/1     Running            0          36m
mysql-66c5c7bf56-d6g9b                                        1/1     Running            0          36m
notebook-controller-deployment-576589db9d-6gfvw               1/1     Running            0          36m
profiles-deployment-874649f89-4wv27                           2/2     Running            0          36m
pytorch-operator-666dd4cd49-b6zgf                             1/1     Running            0          36m
seldon-controller-manager-5d96986d47-m5sjv                    1/1     Running            0          36m
spark-operatorcrd-cleanup-qgbj8                               0/2     Completed          0          36m
spark-operatorsparkoperator-7c484c6859-6hsvm                  1/1     Running            0          36m
spartakus-volunteer-7465bcbdc-4vcxl                           1/1     Running            0          36m
tensorboard-6549cd78c9-zxh42                                  1/1     Running            0          36m
tf-job-operator-7574b968b5-vr4kt                              1/1     Running            0          36m
workflow-controller-6db95548dd-7mtks                          1/1     Running


UI

$ kubectl port-forward svc/istio-ingressgateway -n istio-system 7777:80


localhost:7777


Namespace: anonymous


Argo

$ cd ~/tmp

# Download the binary
$ curl -sLO https://github.com/argoproj/argo-workflows/releases/download/v3.3.9/argo-linux-amd64.gz

# Unzip
$ gunzip argo-linux-amd64.gz

# Make binary executable
$ chmod +x argo-linux-amd64

# Move binary to path
$ sudo mv ./argo-linux-amd64 /usr/local/bin/argo

# Test installation
$ argo version
argo: v3.3.9


$ cd ~/tmp/Kubeflow-for-Machine-Learning-From-Lab-to-Production/environment/argocd/
$ kubectl apply -f ./


$ minikube ip --profile ${PROFILE}
192.168.49.2


$ kubectl get svc -n kubeflow | grep argo
argo-ui                                        NodePort    10.111.86.95     <none>        80:31461/TCP        162m


// OK!
192.168.49.2:31461/argo


// Потом будет такой вывод
$ argo list -n kubeflow
NAME                               STATUS      AGE   DURATION   PRIORITY
recommender-model-update-h8bvn     Failed      4m    3s         0
recommender-model-update-ffqbt     Failed      8m    24s        0
calculation-pipeline-9xhpc         Succeeded   20m   1m         0
simple-sci-kit-kf-pipeline-96b4x   Failed      24m   2m         0


$ argo logs recommender-model-update-ffqbt -n kubeflow


MINio

$ kubectl port-forward -n kubeflow svc/minio-service 9000:9000