@@ -1843,144 +1843,6 @@ def show_models(self) -> Dict[int, Any]:
18431843
18441844 A model dictionary contains the following:
18451845
1846- * ``"model_id"`` - The id given to a model by ``autosklearn``.
1847- * ``"rank"`` - The rank of the model based on it's ``"cost"``.
1848- * ``"cost"`` - The loss of the model on the validation set.
1849- * ``"ensemble_weight"`` - The weight given to the model in the ensemble.
1850- * ``"voting_model"`` - The ``cv_voting_ensemble`` model (for 'cv' resampling).
1851- * ``"estimators"`` - List of models (dicts) in ``cv_voting_ensemble`` (for 'cv' resampling).
1852- * ``"data_preprocessor"`` - The preprocessor used on the data.
1853- * ``"balancing"`` - The balancing used on the data (for classification).
1854- * ``"feature_preprocessor"`` - The preprocessor for features types.
1855- * ``"classifier"`` or ``"regressor"`` - The autosklearn wrapped classifier or regressor.
1856- * ``"sklearn_classifier"`` or ``"sklearn_regressor"`` - The sklearn classifier or regressor.
1857-
1858- **Example**
1859-
1860- .. code-block:: python
1861-
1862- import sklearn.datasets
1863- import sklearn.metrics
1864- import autosklearn.regression
1865-
1866- X, y = sklearn.datasets.load_diabetes(return_X_y=True)
1867-
1868- automl = autosklearn.regression.AutoSklearnRegressor(
1869- time_left_for_this_task=120
1870- )
1871- automl.fit(X_train, y_train, dataset_name='diabetes')
1872-
1873- ensemble_dict = automl.show_models()
1874- print(ensemble_dict)
1875-
1876- Output:
1877-
1878- .. code-block:: text
1879-
1880- {
1881- 25: {'model_id': 25.0,
1882- 'rank': 1,
1883- 'cost': 0.43667876507897496,
1884- 'ensemble_weight': 0.38,
1885- 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing....>,
1886- 'feature_preprocessor': <autosklearn.pipeline.components....>,
1887- 'regressor': <autosklearn.pipeline.components.regression....>,
1888- 'sklearn_regressor': SGDRegressor(alpha=0.0006517033225329654,...)
1889- },
1890- 6: {'model_id': 6.0,
1891- 'rank': 2,
1892- 'cost': 0.4550418898836528,
1893- 'ensemble_weight': 0.3,
1894- 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing....>,
1895- 'feature_preprocessor': <autosklearn.pipeline.components....>,
1896- 'regressor': <autosklearn.pipeline.components.regression....>,
1897- 'sklearn_regressor': ARDRegression(alpha_1=0.0003701926442639788,...)
1898- }...
1899- }
1900-
1901- Returns
1902- -------
1903- Dict(int, Any) : dictionary of length = number of models in the ensemble
1904- A dictionary of models in the ensemble, where ``model_id`` is the key.
1905-
1906- """
1907-
1908- ensemble_dict = {}
1909-
1910- def has_key (rv , key ):
1911- return rv .additional_info and key in rv .additional_info
1912-
1913- table_dict = {}
1914- for rkey , rval in self .runhistory_ .data .items ():
1915- if has_key (rval , 'num_run' ):
1916- model_id = rval .additional_info ['num_run' ]
1917- table_dict [model_id ] = {
1918- 'model_id' : model_id ,
1919- 'cost' : rval .cost
1920- }
1921-
1922- # Checking if the dictionary is empty
1923- if not table_dict :
1924- raise RuntimeError ('No model found. Try increasing \' time_left_for_this_task\' .' )
1925-
1926- for i , weight in enumerate (self .ensemble_ .weights_ ):
1927- (_ , model_id , _ ) = self .ensemble_ .identifiers_ [i ]
1928- table_dict [model_id ]['ensemble_weight' ] = weight
1929-
1930- table = pd .DataFrame .from_dict (table_dict , orient = 'index' )
1931-
1932- # Checking which resampling strategy is chosen and selecting the appropriate models
1933- is_cv = (self ._resampling_strategy == "cv" )
1934- models = self .cv_models_ if is_cv else self .models_
1935-
1936- rank = 1 # Initializing rank for the first model
1937- for (_ , model_id , _ ), model in models .items ():
1938- model_dict = {} # Declaring model dictionary
1939-
1940- # Inserting model_id, rank, cost and ensemble weight
1941- model_dict ['model_id' ] = table .loc [model_id ]['model_id' ].astype (int )
1942- model_dict ['rank' ] = rank
1943- model_dict ['cost' ] = table .loc [model_id ]['cost' ]
1944- model_dict ['ensemble_weight' ] = table .loc [model_id ]['ensemble_weight' ]
1945- rank += 1 # Incrementing rank by 1 for the next model
1946-
1947- # The steps in the models pipeline are as follows:
1948- # 'data_preprocessor': DataPreprocessor,
1949- # 'balancing': Balancing,
1950- # 'feature_preprocessor': FeaturePreprocessorChoice,
1951- # 'classifier'/'regressor': ClassifierChoice/RegressorChoice (autosklearn wrapped model)
1952-
1953- # For 'cv' (cross validation) strategy
1954- if is_cv :
1955- # Voting model created by cross validation
1956- cv_voting_ensemble = model
1957- model_dict ['voting_model' ] = cv_voting_ensemble
1958-
1959- # List of models, each trained on one cv fold
1960- cv_models = []
1961- for cv_model in cv_voting_ensemble .estimators_ :
1962- estimator = dict (cv_model .steps )
1963-
1964- # Adding sklearn model to the model dictionary
1965- model_type , autosklearn_wrapped_model = cv_model .steps [- 1 ]
1966- estimator [f'sklearn_{ model_type } ' ] = autosklearn_wrapped_model .choice .estimator
1967- cv_models .append (estimator )
1968- model_dict ['estimators' ] = cv_models
1969-
1970- # For any other strategy
1971- else :
1972- steps = dict (model .steps )
1973- model_dict .update (steps )
1974-
1975- # Adding sklearn model to the model dictionary
1976- model_type , autosklearn_wrapped_model = model .steps [- 1 ]
1977- model_dict [f'sklearn_{ model_type } ' ] = autosklearn_wrapped_model .choice .estimator
1978-
1979- # Insterting model_dict in the ensemble dictionary
1980- ensemble_dict [model_id ] = model_dict
1981-
1982- return ensemble_dict
1983-
19841846 def _create_search_space(
19851847 self,
19861848 tmp_dir,
0 commit comments