Jump to: navigation, search

Difference between revisions of "Meteos/ExampleKmeans"

(Classify Users by Skill using Meteos)
(4. Parse a raw data)
Line 174: Line 174:
 
     "source_dataset_url": "swift://meteos/kmeans_data.txt",
 
     "source_dataset_url": "swift://meteos/kmeans_data.txt",
 
     "display_name": "sample-data",
 
     "display_name": "sample-data",
     "display_description": "user skill dataset",
+
     "display_description": "This is a sample dataset",
 
     "method": "parse",
 
     "method": "parse",
 
     "params": [{"method": "map", "args": "lambda l: l.split(',',1)[1]"}],
 
     "params": [{"method": "map", "args": "lambda l: l.split(',',1)[1]"}],
     "experiment_id": "91504a65-01cf-428f-81aa-596be7ca8619",
+
     "experiment_id": "fcc8d055-e801-4652-af8c-5aabedbf0286",
 
     "swift_tenant": "demo",
 
     "swift_tenant": "demo",
 
     "swift_username": "demo",
 
     "swift_username": "demo",
Line 188: Line 188:
 
| Property    | Value                                |
 
| Property    | Value                                |
 
+-------------+--------------------------------------+
 
+-------------+--------------------------------------+
| created_at  | 2016-12-05T04:38:01.000000          |
+
| created_at  | 2016-12-15T23:36:11.000000          |
| description | user skill dataset                   |
+
| description | This is a sample dataset             |
 
| head        | None                                |
 
| head        | None                                |
| id          | cfd6f339-5c94-4e1b-a718-11973fe36d95 |
+
| id          | b576d46c-01f7-4020-903b-eb79662e3faa |
 
| name        | sample-data                          |
 
| name        | sample-data                          |
| project_id  | a48633f159924753838dc062e070c539     |
+
| project_id  | 4034bb3cd9324776a68c140fdd18baa4     |
 
| status      | creating                            |
 
| status      | creating                            |
 
| stderr      | None                                |
 
| stderr      | None                                |
| user_id    | 46b1440477394fa3a1cbe64a9c50b192     |
+
| user_id    | 64ad04e101df40b4b51e5f370a83412c     |
 
+-------------+--------------------------------------+
 
+-------------+--------------------------------------+
 
</pre>
 
</pre>
 
<pre>
 
<pre>
 
$ meteos dataset-list
 
$ meteos dataset-list
+--------------------------------------+-------------+--------------------+-----------+--------------------------------+----------------------------+
+
+--------------------------------------+-------------+-----------+--------------------------------+
| id                                  | name       | description       | status    | source_dataset_url            | created_at                |
+
| id                                  | name        | status    | source_dataset_url            |
+--------------------------------------+-------------+--------------------+-----------+--------------------------------+----------------------------+
+
+--------------------------------------+-------------+-----------+--------------------------------+
| cfd6f339-5c94-4e1b-a718-11973fe36d95 | sample-data | user skill dataset | available | swift://meteos/kmeans_data.txt | 2016-12-05T04:38:01.000000 |
+
| b576d46c-01f7-4020-903b-eb79662e3faa | sample-data | available | swift://meteos/kmeans_data.txt |
+--------------------------------------+-------------+--------------------+-----------+--------------------------------+----------------------------+
+
+--------------------------------------+-------------+-----------+--------------------------------+
 
</pre>
 
</pre>
 
<pre>
 
<pre>
$ meteos dataset-show cfd6f339-5c94-4e1b-a718-11973fe36d95
+
$ meteos dataset-show b576d46c-01f7-4020-903b-eb79662e3faa
+-------------+-------------------------------------------------+
+
+-------------+--------------------------------------------------+
| Property    | Value                                           |
+
| Property    | Value                                           |
+-------------+-------------------------------------------------+
+
+-------------+--------------------------------------------------+
| created_at  | 2016-12-05T04:38:01.000000                     |
+
| created_at  | 2016-12-15T23:36:11.000000                       |
| description | user skill dataset                             |
+
| description | This is a sample dataset                         |
| head        | [u'1.0,1.0,0.0,0.0,0.0',                       |
+
| head        | [u'1.0,1.0,0.0,0.0,0.0', u'3.0,3.0,2.0,3.0,3.0', |
|            | u'3.0,3.0,2.0,3.0,3.0', u'5.0,5.0,5.0,4.0,5.0', |
+
|            | u'5.0,5.0,5.0,4.0,5.0', u'7.0,7.0,6.0,6.0,7.0', |
|            | u'7.0,7.0,6.0,6.0,7.0', u'7.0,8.0,7.0,8.0,8.0', |
+
|            | u'7.0,8.0,7.0,8.0,8.0', u'0.0,1.0,1.0,0.0,1.0', |
|            | u'0.0,1.0,1.0,0.0,1.0', u'3.0,2.0,2.0,2.0,3.0', |
+
|            | u'3.0,2.0,2.0,2.0,3.0', u'4.0,4.0,4.0,5.0,5.0', |
|            | u'4.0,4.0,4.0,5.0,5.0', u'6.0,6.0,6.0,7.0,6.0', |
+
|            | u'6.0,6.0,6.0,7.0,6.0', u'8.0,8.0,8.0,7.0,7.0'] |
|            | u'8.0,8.0,8.0,7.0,7.0']                         |
+
| id          | b576d46c-01f7-4020-903b-eb79662e3faa            |
| id          | cfd6f339-5c94-4e1b-a718-11973fe36d95            |
+
| name        | sample-data                                     |
| name        | sample-data                                     |
+
| project_id  | 4034bb3cd9324776a68c140fdd18baa4                |
| project_id  | a48633f159924753838dc062e070c539                |
+
| status      | available                                       |
| status      | available                                       |
+
| stderr      |                                                 |
| stderr      |                                                 |
+
| user_id    | 64ad04e101df40b4b51e5f370a83412c                |
| user_id    | 46b1440477394fa3a1cbe64a9c50b192                |
+
+-------------+--------------------------------------------------+
+-------------+-------------------------------------------------+
 
 
</pre>
 
</pre>
  

Revision as of 23:39, 15 December 2016

Classify Users by Skill using Meteos

In this example, you create a prediction model which classify users by there skill using KMeans Model.

KMeans.png

1. Create a experiment template

Create template of experiment. Experiment is a workspace of Machine Learning.

You have to confirm a glance image id of meteos image, and a neutron network id before creating a template.

You can use a format located in python-meteosclient/sample/json/template.json

$ glance image-list | grep meteos
| a6b7de0b-10ff-462c-9c86-25c8a5622a57 | meteos                          |
$ neutron net-list | grep public
| 4222b557-6d9f-405c-b1ff-0f454d2f35bf | public  | 1f979ae3-d6b7-4d03-ba0f-9d9112581783             |
$ vim sample/json/template.json
$ cat sample/json/template.json
{
    "display_name": "example-template",
    "display_description": "This is a sample template of experiment",
    "image_id" : "a6b7de0b-10ff-462c-9c86-25c8a5622a57",
    "master_nodes_num": 1,
    "master_flavor_id": "4",
    "worker_nodes_num": 2,
    "worker_flavor_id": "2",
    "spark_version": "1.6.0",
    "floating_ip_pool": "4222b557-6d9f-405c-b1ff-0f454d2f35bf"
}
$ meteos template-create --json sample/json/template.json
+---------------+-----------------------------------------+
| Property      | Value                                   |
+---------------+-----------------------------------------+
| cluster_id    | None                                    |
| created_at    | 2016-12-15T22:55:03.000000              |
| description   | This is a sample template of experiment |
| id            | d3180a78-74cc-432d-9e9d-68640b18abae    |
| master_flavor | 4                                       |
| master_nodes  | 1                                       |
| name          | example-template                        |
| project_id    | 4034bb3cd9324776a68c140fdd18baa4        |
| spark_version | 1.6.0                                   |
| status        | available                               |
| user_id       | 64ad04e101df40b4b51e5f370a83412c        |
| worker_flavor | 2                                       |
| worker_nodes  | 2                                       |
+---------------+-----------------------------------------+

2. Create a experiment from template

Create a experiment by using template created in the above step. You have to confirm a neutron private network id and create keypair before creating a template.

You can use a format located in python-meteosclient/sample/json/experiment.json

$ nova keypair-add key1 > ~/key1.pem && chmod 600 ~/key1.pem
$ neutron net-list | grep private
| e4fcc49c-48e5-48f8-9599-bb5eba1339c9 | private | e15c24a5-dfdd-4428-b27d-9827b35600c0 10.0.0.0/26 |
$ vim sample/json/experiment.json
$ cat sample/json/experiment.json
{
    "display_name": "example-experiment",
    "display_description": "This is a sample experiment",
    "key_name": "key1",
    "neutron_management_network": "e4fcc49c-48e5-48f8-9599-bb5eba1339c9",
    "template_id": "d3180a78-74cc-432d-9e9d-68640b18abae"
}
$ meteos experiment-create --json sample/json/experiment.json
+--------------------+--------------------------------------+
| Property           | Value                                |
+--------------------+--------------------------------------+
| created_at         | 2016-12-15T22:56:51.000000           |
| description        | This is a sample experiment          |
| id                 | fcc8d055-e801-4652-af8c-5aabedbf0286 |
| key_name           | key1                                 |
| management_network | e4fcc49c-48e5-48f8-9599-bb5eba1339c9 |
| name               | example-experiment                   |
| project_id         | 4034bb3cd9324776a68c140fdd18baa4     |
| status             | creating                             |
| user_id            | 64ad04e101df40b4b51e5f370a83412c     |
+--------------------+--------------------------------------+

Meteos creates a experiment using OpenStack Sahara spark plugin.

You can see a sahara cluster and nova VMs created by Meteos as below.

$ openstack dataprocessing cluster list (or sahara cluster-list)
+------------------+--------------------------------------+-------------+----------------+----------+
| Name             | Id                                   | Plugin name | Plugin version | Status   |
+------------------+--------------------------------------+-------------+----------------+----------+
| cluster-fcc8d055 | 5736d157-ac7c-41de-8aca-78f7afa7e99c | spark       | 1.6.0          | Spawning |
+------------------+--------------------------------------+-------------+----------------+----------+
$ openstack server list (or nova list)
+--------------------------------------+----------------------------+--------+------------+-------------+------------------+
| ID                                   | Name                       | Status | Task State | Power State | Networks         |
+--------------------------------------+----------------------------+--------+------------+-------------+------------------+
| 48a9f429-7756-4bed-8dd6-6dc6140ef897 | cluster-fcc8d055-master-0  | ACTIVE | -          | Running     | private=10.0.0.5 |
| 88ff2070-dfe8-45da-aa5c-02ac3e9de3b8 | cluster-fcc8d055-workers-0 | ACTIVE | -          | Running     | private=10.0.0.7 |
| a57dfa5d-8b55-47c7-aae7-d5b3c8779787 | cluster-fcc8d055-workers-1 | ACTIVE | -          | Running     | private=10.0.0.4 |
+--------------------------------------+----------------------------+--------+------------+-------------+------------------+

3. Upload a raw data

Upload a raw data (in this example user skill data) to OpenStack Swift.

You can use a sample data located in python-meteosclient/sample/data/kmeans_data.txt

Raw data shows "UserID", "Rank of Skill A", "Rank of Skill A", ... from left.

$ cd sample/data/
/sample/data$ head kmeans_data.txt
1,1.0,1.0,0.0,0.0,0.0
2,3.0,3.0,2.0,3.0,3.0
3,5.0,5.0,5.0,4.0,5.0
4,7.0,7.0,6.0,6.0,7.0
5,7.0,8.0,7.0,8.0,8.0
6,0.0,1.0,1.0,0.0,1.0
7,3.0,2.0,2.0,2.0,3.0
8,4.0,4.0,4.0,5.0,5.0
9,6.0,6.0,6.0,7.0,6.0
10,8.0,8.0,8.0,7.0,7.0
/sample/data$ swift upload meteos kmeans_data.txt
kmeans_data.txt

4. Parse a raw data

Parse a raw data to enable Prediction Model to handle it.

KMeans model requires only parameters to classify, so you have to elminate UserID from raw data using map method.

You can use a sample format located in python-meteosclient/sample/json/dataset_parse.json

You can see the head data of parsed dataset by executing "meteos dataset-show <dataset-uuid>" command.

$ vim ../python-meteosclient/sample/json/dataset_parse.json
$ cat ../python-meteosclient/sample/json/dataset_parse.json
{
    "source_dataset_url": "swift://meteos/kmeans_data.txt",
    "display_name": "sample-data",
    "display_description": "This is a sample dataset",
    "method": "parse",
    "params": [{"method": "map", "args": "lambda l: l.split(',',1)[1]"}],
    "experiment_id": "fcc8d055-e801-4652-af8c-5aabedbf0286",
    "swift_tenant": "demo",
    "swift_username": "demo",
    "swift_password": "nova"
}
$ meteos dataset-create --json sample/json/dataset_parse.json
+-------------+--------------------------------------+
| Property    | Value                                |
+-------------+--------------------------------------+
| created_at  | 2016-12-15T23:36:11.000000           |
| description | This is a sample dataset             |
| head        | None                                 |
| id          | b576d46c-01f7-4020-903b-eb79662e3faa |
| name        | sample-data                          |
| project_id  | 4034bb3cd9324776a68c140fdd18baa4     |
| status      | creating                             |
| stderr      | None                                 |
| user_id     | 64ad04e101df40b4b51e5f370a83412c     |
+-------------+--------------------------------------+
$ meteos dataset-list
+--------------------------------------+-------------+-----------+--------------------------------+
| id                                   | name        | status    | source_dataset_url             |
+--------------------------------------+-------------+-----------+--------------------------------+
| b576d46c-01f7-4020-903b-eb79662e3faa | sample-data | available | swift://meteos/kmeans_data.txt |
+--------------------------------------+-------------+-----------+--------------------------------+
$  meteos dataset-show b576d46c-01f7-4020-903b-eb79662e3faa
+-------------+--------------------------------------------------+
| Property    | Value                                            |
+-------------+--------------------------------------------------+
| created_at  | 2016-12-15T23:36:11.000000                       |
| description | This is a sample dataset                         |
| head        | [u'1.0,1.0,0.0,0.0,0.0', u'3.0,3.0,2.0,3.0,3.0', |
|             | u'5.0,5.0,5.0,4.0,5.0', u'7.0,7.0,6.0,6.0,7.0',  |
|             | u'7.0,8.0,7.0,8.0,8.0', u'0.0,1.0,1.0,0.0,1.0',  |
|             | u'3.0,2.0,2.0,2.0,3.0', u'4.0,4.0,4.0,5.0,5.0',  |
|             | u'6.0,6.0,6.0,7.0,6.0', u'8.0,8.0,8.0,7.0,7.0']  |
| id          | b576d46c-01f7-4020-903b-eb79662e3faa             |
| name        | sample-data                                      |
| project_id  | 4034bb3cd9324776a68c140fdd18baa4                 |
| status      | available                                        |
| stderr      |                                                  |
| user_id     | 64ad04e101df40b4b51e5f370a83412c                 |
+-------------+--------------------------------------------------+

5. Create a prediction model

In this example, User creates a Kmeans Model from parsed dataset.

Parsed dataset has been already distributed in hdfs of experiment environment.

So, you spefity the internal url (internal://<dataset-id>) in source_dataset_url parameter.

And you can spefity a numClasses parameter in model_params which shows how many classes you want to classify.

$ vim sample/json/model_kmeans.json
$ cat sample/json/model_kmeans.json
{
    "display_name": "sample-kmeans-model",
    "display_description": "KMeans Model",
    "source_dataset_url": "internal://cfd6f339-5c94-4e1b-a718-11973fe36d95",
    "model_type": "KMeans",
    "model_params": "{'numClasses': 5}",
    "experiment_id": "91504a65-01cf-428f-81aa-596be7ca8619"
}
$ meteos model-create --json sample/json/model_kmeans.json
+-------------+--------------------------------------+
| Property    | Value                                |
+-------------+--------------------------------------+
| created_at  | 2016-12-05T04:57:32.000000           |
| description | KMeans Model                         |
| id          | cb829823-a57d-45f1-ae38-658677be085f |
| name        | sample-kmeans-model                  |
| params      | eydudW1DbGFzc2VzJzogNX0=             |
| project_id  | a48633f159924753838dc062e070c539     |
| status      | creating                             |
| stderr      | None                                 |
| stdout      | None                                 |
| type        | KMeans                               |
| user_id     | 46b1440477394fa3a1cbe64a9c50b192     |
+-------------+--------------------------------------+
$ meteos model-list
+--------------------------------------+---------------------+--------------+-----------+--------+-------------------------------------------------+----------------------------+
| id                                   | name                | description  | status    | type   | source_dataset_url                              | created_at                 |
+--------------------------------------+---------------------+--------------+-----------+--------+-------------------------------------------------+----------------------------+
| cb829823-a57d-45f1-ae38-658677be085f | sample-kmeans-model | KMeans Model | available | KMeans | internal://cfd6f339-5c94-4e1b-a718-11973fe36d95 | 2016-12-05T04:57:32.000000 |
+--------------------------------------+---------------------+--------------+-----------+--------+-------------------------------------------------+----------------------------+

6. Create a learning job

Create learning job and retrieve class.

Retrieve a output data as a stdout of job execution.

The Class number (stdout) outputted by KMeans Model just indicates a number to classify, not inidicatea a rank.

$ vim sample/json/learning.json
$ cat sample/json/learning.json
{
    "display_name": "example-learning-job",
    "display_description": "This is a sample job",
    "model_id": "cb829823-a57d-45f1-ae38-658677be085f",
    "method": "predict",
    "args": "4.0,4.0,4.0,4.0,4.0"
}
$ meteos learning-create --json sample/json/learning.json
+-------------+--------------------------------------+
| Property    | Value                                |
+-------------+--------------------------------------+
| args        | NC4wLDQuMCw0LjAsNC4wLDQuMA==         |
| created_at  | 2016-12-05T05:11:05.000000           |
| description | This is a sample job                 |
| id          | a7167c29-fa47-4265-9295-94eafafe4422 |
| method      | predict                              |
| name        | example-learning-job                 |
| project_id  | a48633f159924753838dc062e070c539     |
| status      | creating                             |
| stderr      | None                                 |
| stdout      | None                                 |
| user_id     | 46b1440477394fa3a1cbe64a9c50b192     |
+-------------+--------------------------------------+
$ meteos learning-list
+--------------------------------------+-------------+----------------------+-----------+---------------------+--------+
| id                                   | name        | description          | status    | args                | stdout |
+--------------------------------------+-------------+----------------------+-----------+---------------------+--------+
| 52106fde-7b0f-490a-892a-33d60019a82a | example-job | This is a sample job | available | 7.0,7.0,7.0,7.0,7.0 | 3      |
| 645e1411-91ca-41d6-a171-3bceaafee4ce | example-job | This is a sample job | available | 5.0,5.0,5.0,5.0,5.0 | 4      |
| a7167c29-fa47-4265-9295-94eafafe4422 | example-job | This is a sample job | available | 4.0,4.0,4.0,4.0,4.0 | 4      |
| c92dfd97-de46-4b7e-90f4-c4d2dc19581d | example-job | This is a sample job | available | 1.0,1.0,1.0,2.0,1.0 | 0      |
| f08c62a0-06a3-4940-ac06-733a00ec75ab | example-job | This is a sample job | available | 2.0,2.0,2.0,2.0,2.0 | 2      |
+--------------------------------------+-------------+----------------------+-----------+---------------------+--------+