!wget https://raw.githubusercontent.com/apache/incubator-mxnet/master/tools/im2rec.py ./
!pip install mxnet
!aws s3 cp --recursive s3://easy-ml-pocs/pneumonia-image-data/ ./data/
!python im2rec.py ./data_rec ./data/ --recursive --list --num-thread 8 --test-ratio=0.3 --train-ratio=0.7
!ls
!python im2rec.py ./data_rec ./data/ --recursive --pass-through --pack-label --num-thread 8
!wc -l data_rec_train.lst
import sagemaker
sess = sagemaker.Session()
trainpath = sess.upload_data(
path='data_rec_train.rec', bucket='easy-ml-pocs',
key_prefix='sagemaker/pneumonia-image-data/input')
testpath = sess.upload_data(
path='data_rec_test.rec', bucket='easy-ml-pocs',
key_prefix='sagemaker/pneumonia-image-data/input')
trainpath
testpath
# The algorithm supports multiple network depth (number of layers). They are 18, 34, 50, 101, 152 and 200
# For this training, we will use 18 layers
num_layers = "18"
# we need to specify the input image shape for the training data
image_shape = "3,1000,1000"
# we also need to specify the number of training samples in the training set
# for caltech it is 15420
num_training_samples = "1358"
# specify the number of output classes
num_classes = "2"
# batch size for training
mini_batch_size = "64"
# number of epochs
epochs = "2"
# learning rate
learning_rate = "0.01"
%%time
import time
import boto3
from time import gmtime, strftime
from sagemaker.amazon.amazon_estimator import get_image_uri
from sagemaker import get_execution_role
role = get_execution_role()
bucket='easy-ml-pocs' # customize to your bucket
training_image = get_image_uri(boto3.Session().region_name, 'image-classification')
s3 = boto3.client('s3')
# create unique job name
job_name = 'DEMO-imageclassification-' + time.strftime('-%Y-%m-%d-%H-%M-%S', time.gmtime())
training_params = \
{
# specify the training docker image
"AlgorithmSpecification": {
"TrainingImage": training_image,
"TrainingInputMode": "File"
},
"RoleArn": role,
"OutputDataConfig": {
"S3OutputPath": 's3://{}/sagemaker/{}/output'.format(bucket, 'DEMO-imageclassification-')
},
"ResourceConfig": {
"InstanceCount": 1,
"InstanceType": "ml.p3.16xlarge",
"VolumeSizeInGB": 50
},
"TrainingJobName": job_name,
"HyperParameters": {
"image_shape": image_shape,
"num_layers": str(num_layers),
"num_training_samples": str(num_training_samples),
"num_classes": str(num_classes),
"mini_batch_size": str(mini_batch_size),
"epochs": str(epochs),
"learning_rate": str(learning_rate)
},
"StoppingCondition": {
"MaxRuntimeInSeconds": 360000
},
#Training data should be inside a subdirectory called "train"
#Validation data should be inside a subdirectory called "validation"
#The algorithm currently only supports fullyreplicated model (where data is copied onto each machine)
"InputDataConfig": [
{
"ChannelName": "train",
"DataSource": {
"S3DataSource": {
"S3DataType": "S3Prefix",
"S3Uri": trainpath,
"S3DataDistributionType": "FullyReplicated"
}
},
"ContentType": "application/x-recordio",
"CompressionType": "None"
},
{
"ChannelName": "validation",
"DataSource": {
"S3DataSource": {
"S3DataType": "S3Prefix",
"S3Uri": testpath,
"S3DataDistributionType": "FullyReplicated"
}
},
"ContentType": "application/x-recordio",
"CompressionType": "None"
}
]
}
print('Training job name: {}'.format(job_name))
print('\nInput Data Location: {}'.format(training_params['InputDataConfig'][0]['DataSource']['S3DataSource']))
# create the Amazon SageMaker training job
sagemaker = boto3.client(service_name='sagemaker')
sagemaker.create_training_job(**training_params)
# confirm that the training job has started
status = sagemaker.describe_training_job(TrainingJobName=job_name)['TrainingJobStatus']
print('Training job current status: {}'.format(status))
try:
# wait for the job to finish and report the ending status
sagemaker.get_waiter('training_job_completed_or_stopped').wait(TrainingJobName=job_name)
training_info = sagemaker.describe_training_job(TrainingJobName=job_name)
status = training_info['TrainingJobStatus']
print("Training job ended with status: " + status)
except:
print('Training failed to start')
# if exception is raised, that means it has failed
message = sagemaker.describe_training_job(TrainingJobName=job_name)['FailureReason']
print('Training failed with the following error: {}'.format(message))
import boto3
from time import gmtime, strftime
sage = boto3.Session().client(service_name='sagemaker')
model_name="DEMO--classification-model" + time.strftime('-%Y-%m-%d-%H-%M-%S', time.gmtime())
print(model_name)
info = sage.describe_training_job(TrainingJobName=job_name)
model_data = info['ModelArtifacts']['S3ModelArtifacts']
print(model_data)
hosting_image = get_image_uri(boto3.Session().region_name, 'image-classification')
primary_container = {
'Image': hosting_image,
'ModelDataUrl': model_data,
}
create_model_response = sage.create_model(
ModelName = model_name,
ExecutionRoleArn = role,
PrimaryContainer = primary_container)
print(create_model_response['ModelArn'])
from time import gmtime, strftime
timestamp = time.strftime('-%Y-%m-%d-%H-%M-%S', time.gmtime())
endpoint_config_name = 'DEMO-imageclassification' + '-epc-' + timestamp
endpoint_config_response = sage.create_endpoint_config(
EndpointConfigName = endpoint_config_name,
ProductionVariants=[{
'InstanceType':'ml.m4.xlarge',
'InitialInstanceCount':1,
'ModelName':model_name,
'VariantName':'AllTraffic'}])
print('Endpoint configuration name: {}'.format(endpoint_config_name))
print('Endpoint configuration arn: {}'.format(endpoint_config_response['EndpointConfigArn']))
import time
timestamp = time.strftime('-%Y-%m-%d-%H-%M-%S', time.gmtime())
endpoint_name = 'DEMO-imageclassification' + '-ep-' + timestamp
print('Endpoint name: {}'.format(endpoint_name))
endpoint_params = {
'EndpointName': endpoint_name,
'EndpointConfigName': endpoint_config_name,
}
endpoint_response = sagemaker.create_endpoint(**endpoint_params)
print('EndpointArn = {}'.format(endpoint_response['EndpointArn']))
# get the status of the endpoint
response = sagemaker.describe_endpoint(EndpointName=endpoint_name)
status = response['EndpointStatus']
print('EndpointStatus = {}'.format(status))
# wait until the status has changed
sagemaker.get_waiter('endpoint_in_service').wait(EndpointName=endpoint_name)
# print the status of the endpoint
endpoint_response = sagemaker.describe_endpoint(EndpointName=endpoint_name)
status = endpoint_response['EndpointStatus']
print('Endpoint creation ended with EndpointStatus = {}'.format(status))
if status != 'InService':
raise Exception('Endpoint creation failed.')
import boto3
runtime = boto3.Session().client(service_name='runtime.sagemaker')
file_name = '/tmp/test.jpg'
!aws s3 cp s3://easy-ml-pocs/pneumonia-image-data/pneumonia/person1096_bacteria_3037.jpeg {file_name}
from IPython.display import Image
Image(file_name)
import json
import numpy as np
with open(file_name, 'rb') as f:
payload = f.read()
payload = bytearray(payload)
response = runtime.invoke_endpoint(EndpointName=endpoint_name,
ContentType='application/x-image',
Body=payload)
result = response['Body'].read()
# result will be in json format and convert it to ndarray
result = json.loads(result)
result
index = np.argmax(result)
object_categories = ['normal','pneumonia']
print("Result: label - " + object_categories[index] + ", probability - " + str(result[index]))