Create new tasks

You can build a new task in the CogDL. The BaseTask class are:

class BaseTask(object):
    def add_args(parser):
        """Add task-specific arguments to the parser."""

    def __init__(self, args):

    def train(self, num_epoch):
        raise NotImplementedError

You can create a subclass to implement ‘train’ method like CommunityDetection, which get representation of each node and apply clustering algorithm (K-means) to evaluate.

class CommunityDetection(BaseTask):
    """Community Detection task."""

    def add_args(parser):
        """Add task-specific arguments to the parser."""
        parser.add_argument("--hidden-size", type=int, default=128)
        parser.add_argument("--num-shuffle", type=int, default=5)

    def __init__(self, args):
        super(CommunityDetection, self).__init__(args)
        dataset = build_dataset(args) = dataset[0]

        self.num_nodes, self.num_classes =
        self.label = np.argmax(, axis=1)
        self.model = build_model(args)
        self.hidden_size = args.hidden_size
        self.num_shuffle = args.num_shuffle

    def train(self):
        G = nx.Graph()
        embeddings = self.model.train(G)

        clusters = [30, 50, 70]
        all_results = defaultdict(list)
        for num_cluster in clusters:
            for _ in range(self.num_shuffle):
                model = KMeans(n_clusters=num_cluster).fit(embeddings)
                nmi_score = normalized_mutual_info_score(self.label, model.labels_)

        return dict(
                f"normalized_mutual_info_score {num_cluster}",
                sum(all_results[num_cluster]) / len(all_results[num_cluster]),
            for num_cluster in sorted(all_results.keys())

After creating your own task, you could run the task on different models and dataset. You can use ‘build_model’, ‘build_dataset’, ‘build_task’ method to build them with coresponding hyper-parameters.

from cogdl.tasks import build_task
from cogdl.datasets import build_dataset
from cogdl.models import build_model
from cogdl.utils import build_args_from_dict

def run_deepwalk_ppi():
    default_dict = {'hidden_size': 64, 'num_shuffle': 1, 'cpu': True}
    args = build_args_from_dict(default_dict)

    # model, dataset and task parameters
    args.model = 'spectral'
    args.dataset = 'ppi'
    args.task = 'community_detection'

    # build model, dataset and task
    dataset = build_dataset(args)
    model = build_model(args)
    task = build_task(args)

    # train model and get evaluate results
    ret = task.train()