You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
import logging
from ludwig.api import LudwigModel
from ludwig.datasets import agnews
Loads the dataset as a pandas.DataFrame
train_df, test_df, _ = agnews.load(split=True)
Prints a preview of the first five rows.
train_df.head(5)
config = {
"input_features": [
{
"name": "title", # The name of the input column
"type": "text", # Data type of the input column
"encoder": {
"type": "auto_transformer", # The model architecture to use
"pretrained_model_name_or_path": "bigscience/bloom-3b",
"trainable": True,
},
},
],
"output_features": [
{
"name": "class",
"type": "category",
}
],
"trainer": {
"learning_rate": 0.00001,
"epochs": 3, # We'll train for three epochs. Training longer might give
# better performance.
},
"backend": {
"type": "ray",
"trainer": {
"strategy": "fsdp", # fsdp distributed strategy for using a multi-GPU cluster
}
}
}
model = LudwigModel(config, logging_level=logging.INFO)
train_stats, preprocessed_data, output_directory = model.train(dataset=train_df)
Error
2023-04-12 02:50:23,717 WARNING read_api.py:330 -- ⚠️ The number of blocks in this dataset (1) limits its parallelism to 1 concurrent tasks. This is much less than the number of available CPU slots in the cluster. Use .repartition(n) to increase the number of dataset blocks.
Parquet Files Sample: 100%|██████████| 1/1 [00:00<00:00, 2.12it/s]
Parquet Files Sample: 0%| | 0/1 [00:00<?, ?it/s]
(_sample_piece pid=2075, ip=172.31.47.162) 2023-04-12 02:50:24,737 INFO worker.py:772 -- Task failed with retryable exception: TaskID(45b3d0fcab720f49ffffffffffffffffffffffff01000000).
(_sample_piece pid=2075, ip=172.31.47.162) Traceback (most recent call last):
(_sample_piece pid=2075, ip=172.31.47.162) File "python/ray/_raylet.pyx", line 857, in ray._raylet.execute_task
(_sample_piece pid=2075, ip=172.31.47.162) File "python/ray/_raylet.pyx", line 861, in ray._raylet.execute_task
(_sample_piece pid=2075, ip=172.31.47.162) File "/home/ray/anaconda3/lib/python3.8/site-packages/ray/data/datasource/parquet_datasource.py", line 461, in _sample_piece
(_sample_piece pid=2075, ip=172.31.47.162) piece = piece.subset(row_group_ids=[0])
(_sample_piece pid=2075, ip=172.31.47.162) File "pyarrow/_dataset_parquet.pyx", line 424, in pyarrow._dataset_parquet.ParquetFileFragment.subset
(_sample_piece pid=2075, ip=172.31.47.162) File "pyarrow/error.pxi", line 143, in pyarrow.lib.pyarrow_internal_check_status
(_sample_piece pid=2075, ip=172.31.47.162) File "pyarrow/_fs.pyx", line 1179, in pyarrow._fs._cb_open_input_file
(_sample_piece pid=2075, ip=172.31.47.162) File "/home/ray/anaconda3/lib/python3.8/site-packages/pyarrow/fs.py", line 394, in open_input_file
(_sample_piece pid=2075, ip=172.31.47.162) raise FileNotFoundError(path)
(_sample_piece pid=2075, ip=172.31.47.162) FileNotFoundError: /home/ray/1e10f286d91711edbbf702820dcb34a8.validation.parquet/part.00000000.parquet
2023-04-12 02:50:25,232 WARNING read_api.py:330 -- ⚠️ The number of blocks in this dataset (1) limits its parallelism to 1 concurrent tasks. This is much less than the number of available CPU slots in the cluster. Use .repartition(n) to increase the number of dataset blocks.
Parquet Files Sample: 100%|██████████| 1/1 [00:01<00:00, 1.49s/it]
Parquet Files Sample: 0%| | 0/1 [00:00<?, ?it/s]
(_sample_piece pid=2075, ip=172.31.47.162) 2023-04-12 02:50:25,243 INFO worker.py:772 -- Task failed with retryable exception: TaskID(06f28617326374dbffffffffffffffffffffffff01000000).
(_sample_piece pid=2075, ip=172.31.47.162) Traceback (most recent call last):
(_sample_piece pid=2075, ip=172.31.47.162) File "python/ray/_raylet.pyx", line 857, in ray._raylet.execute_task
(_sample_piece pid=2075, ip=172.31.47.162) File "python/ray/_raylet.pyx", line 861, in ray._raylet.execute_task
(_sample_piece pid=2075, ip=172.31.47.162) File "/home/ray/anaconda3/lib/python3.8/site-packages/ray/data/datasource/parquet_datasource.py", line 461, in _sample_piece
(_sample_piece pid=2075, ip=172.31.47.162) piece = piece.subset(row_group_ids=[0])
(_sample_piece pid=2075, ip=172.31.47.162) File "pyarrow/_dataset_parquet.pyx", line 424, in pyarrow._dataset_parquet.ParquetFileFragment.subset
(_sample_piece pid=2075, ip=172.31.47.162) File "pyarrow/error.pxi", line 143, in pyarrow.lib.pyarrow_internal_check_status
(_sample_piece pid=2075, ip=172.31.47.162) File "pyarrow/_fs.pyx", line 1179, in pyarrow._fs._cb_open_input_file
(_sample_piece pid=2075, ip=172.31.47.162) File "/home/ray/anaconda3/lib/python3.8/site-packages/pyarrow/fs.py", line 394, in open_input_file
(_sample_piece pid=2075, ip=172.31.47.162) raise FileNotFoundError(path)
(_sample_piece pid=2075, ip=172.31.47.162) FileNotFoundError: /home/ray/1e10f286d91711edbbf702820dcb34a8.test.parquet/part.00000000.parquet
2023-04-12 02:50:26,238 WARNING read_api.py:330 -- ⚠️ The number of blocks in this dataset (1) limits its parallelism to 1 concurrent tasks. This is much less than the number of available CPU slots in the cluster. Use .repartition(n) to increase the number of dataset blocks.
Parquet Files Sample: 100%|██████████| 1/1 [00:00<00:00, 1.00it/s]
Code:
import logging
from ludwig.api import LudwigModel
from ludwig.datasets import agnews
Loads the dataset as a pandas.DataFrame
train_df, test_df, _ = agnews.load(split=True)
Prints a preview of the first five rows.
train_df.head(5)
config = {
"input_features": [
{
"name": "title", # The name of the input column
"type": "text", # Data type of the input column
"encoder": {
"type": "auto_transformer", # The model architecture to use
"pretrained_model_name_or_path": "bigscience/bloom-3b",
"trainable": True,
},
},
],
"output_features": [
{
"name": "class",
"type": "category",
}
],
"trainer": {
"learning_rate": 0.00001,
"epochs": 3, # We'll train for three epochs. Training longer might give
# better performance.
},
"backend": {
"type": "ray",
"trainer": {
"strategy": "fsdp", # fsdp distributed strategy for using a multi-GPU cluster
}
}
}
model = LudwigModel(config, logging_level=logging.INFO)
train_stats, preprocessed_data, output_directory = model.train(dataset=train_df)
Error
2023-04-12 02:50:23,717 WARNING read_api.py:330 --⚠️ The number of blocks in this dataset (1) limits its parallelism to 1 concurrent tasks. This is much less than the number of available CPU slots in the cluster. Use ⚠️ The number of blocks in this dataset (1) limits its parallelism to 1 concurrent tasks. This is much less than the number of available CPU slots in the cluster. Use ⚠️ The number of blocks in this dataset (1) limits its parallelism to 1 concurrent tasks. This is much less than the number of available CPU slots in the cluster. Use
.repartition(n)
to increase the number of dataset blocks.Parquet Files Sample: 100%|██████████| 1/1 [00:00<00:00, 2.12it/s]
Parquet Files Sample: 0%| | 0/1 [00:00<?, ?it/s]
(_sample_piece pid=2075, ip=172.31.47.162) 2023-04-12 02:50:24,737 INFO worker.py:772 -- Task failed with retryable exception: TaskID(45b3d0fcab720f49ffffffffffffffffffffffff01000000).
(_sample_piece pid=2075, ip=172.31.47.162) Traceback (most recent call last):
(_sample_piece pid=2075, ip=172.31.47.162) File "python/ray/_raylet.pyx", line 857, in ray._raylet.execute_task
(_sample_piece pid=2075, ip=172.31.47.162) File "python/ray/_raylet.pyx", line 861, in ray._raylet.execute_task
(_sample_piece pid=2075, ip=172.31.47.162) File "/home/ray/anaconda3/lib/python3.8/site-packages/ray/data/datasource/parquet_datasource.py", line 461, in _sample_piece
(_sample_piece pid=2075, ip=172.31.47.162) piece = piece.subset(row_group_ids=[0])
(_sample_piece pid=2075, ip=172.31.47.162) File "pyarrow/_dataset_parquet.pyx", line 424, in pyarrow._dataset_parquet.ParquetFileFragment.subset
(_sample_piece pid=2075, ip=172.31.47.162) File "pyarrow/error.pxi", line 143, in pyarrow.lib.pyarrow_internal_check_status
(_sample_piece pid=2075, ip=172.31.47.162) File "pyarrow/_fs.pyx", line 1179, in pyarrow._fs._cb_open_input_file
(_sample_piece pid=2075, ip=172.31.47.162) File "/home/ray/anaconda3/lib/python3.8/site-packages/pyarrow/fs.py", line 394, in open_input_file
(_sample_piece pid=2075, ip=172.31.47.162) raise FileNotFoundError(path)
(_sample_piece pid=2075, ip=172.31.47.162) FileNotFoundError: /home/ray/1e10f286d91711edbbf702820dcb34a8.validation.parquet/part.00000000.parquet
2023-04-12 02:50:25,232 WARNING read_api.py:330 --
.repartition(n)
to increase the number of dataset blocks.Parquet Files Sample: 100%|██████████| 1/1 [00:01<00:00, 1.49s/it]
Parquet Files Sample: 0%| | 0/1 [00:00<?, ?it/s]
(_sample_piece pid=2075, ip=172.31.47.162) 2023-04-12 02:50:25,243 INFO worker.py:772 -- Task failed with retryable exception: TaskID(06f28617326374dbffffffffffffffffffffffff01000000).
(_sample_piece pid=2075, ip=172.31.47.162) Traceback (most recent call last):
(_sample_piece pid=2075, ip=172.31.47.162) File "python/ray/_raylet.pyx", line 857, in ray._raylet.execute_task
(_sample_piece pid=2075, ip=172.31.47.162) File "python/ray/_raylet.pyx", line 861, in ray._raylet.execute_task
(_sample_piece pid=2075, ip=172.31.47.162) File "/home/ray/anaconda3/lib/python3.8/site-packages/ray/data/datasource/parquet_datasource.py", line 461, in _sample_piece
(_sample_piece pid=2075, ip=172.31.47.162) piece = piece.subset(row_group_ids=[0])
(_sample_piece pid=2075, ip=172.31.47.162) File "pyarrow/_dataset_parquet.pyx", line 424, in pyarrow._dataset_parquet.ParquetFileFragment.subset
(_sample_piece pid=2075, ip=172.31.47.162) File "pyarrow/error.pxi", line 143, in pyarrow.lib.pyarrow_internal_check_status
(_sample_piece pid=2075, ip=172.31.47.162) File "pyarrow/_fs.pyx", line 1179, in pyarrow._fs._cb_open_input_file
(_sample_piece pid=2075, ip=172.31.47.162) File "/home/ray/anaconda3/lib/python3.8/site-packages/pyarrow/fs.py", line 394, in open_input_file
(_sample_piece pid=2075, ip=172.31.47.162) raise FileNotFoundError(path)
(_sample_piece pid=2075, ip=172.31.47.162) FileNotFoundError: /home/ray/1e10f286d91711edbbf702820dcb34a8.test.parquet/part.00000000.parquet
2023-04-12 02:50:26,238 WARNING read_api.py:330 --
.repartition(n)
to increase the number of dataset blocks.Parquet Files Sample: 100%|██████████| 1/1 [00:00<00:00, 1.00it/s]
Dataset Statistics
╒════════════╤═══════════════╤════════════════════╕
│ Dataset │ Size (Rows) │ Size (In Memory) │
╞════════════╪═══════════════╪════════════════════╡
│ Training │ 80626 │ 12.44 Mb │
├────────────┼───────────────┼────────────────────┤
│ Validation │ 11383 │ 1.76 Mb │
├────────────┼───────────────┼────────────────────┤
│ Test │ 22890 │ 3.53 Mb │
╘════════════╧═══════════════╧════════════════════╛
╒═══════╕
│ MODEL │
╘═══════╛
After this the bloom model downloads but never uses resources from clusters
The text was updated successfully, but these errors were encountered: