一个读写请求何时被读写,怎样读写,全看请求队列。以Goldfish平台上的MMC卡,我们来看看其请求队列都怎样设置的:
mmc_blk_probe()
597 struct mmc_blk_data *md;
598 int err;
599
600 char cap_str[10];
601
602 /*
603 * Check that the card supports the command class(es) we need.
604 */
605 if (!(card->csd.cmdclass & CCC_BLOCK_READ))
606 return -ENODEV;
607
608 md = mmc_blk_alloc(card);
mmc_blk_probe->mmc_blk_alloc()
510 struct mmc_blk_data *md;
511 int devidx, ret;
512
513 devidx = find_first_zero_bit(dev_use, MMC_NUM_MINORS);//在dev_use中查找一个没有被用到的
514 if (devidx >= MMC_NUM_MINORS)
515 return ERR_PTR(-ENOSPC);
516 __set_bit(devidx, dev_use);
517
518 md = kzalloc(sizeof(struct mmc_blk_data), GFP_KERNEL);
519 if (!md) {
520 ret = -ENOMEM;
521 goto out;
522 }
523
524
525 /*
526 * Set the read-only status based on the supported commands
527 * and the write protect switch.
528 */
529 md->read_only = mmc_blk_readonly(card);
530
531 md->disk = alloc_disk(1 << MMC_SHIFT);//这个结构非常非常重要
532 if (md->disk == NULL) {
533 ret = -ENOMEM;
534 goto err_kfree;
535 }
536
537 spin_lock_init(&md->lock);
538 md->usage = 1;//每get一次,会++
539
540 ret = mmc_init_queue(&md->queue, card, &md->lock);
541 if (ret)
542 goto err_putdisk;
543
544 md->queue.issue_fn = mmc_blk_issue_rq;
545 md->queue.data = md;
546
547 md->disk->major = MMC_BLOCK_MAJOR;//发送的request将根据它来寻找disk,然后挂载在disk->queue上
548 md->disk->first_minor = devidx << MMC_SHIFT;
549 md->disk->fops = &mmc_bdops;
550 md->disk->private_data = md;
551 md->disk->queue = md->queue.queue;//哦,原来如此哇~
552 md->disk->driverfs_dev = &card->dev;
第531行通过alloc_disk分配一个disk,这个结构就是通用块设备结构。所有的request,将查询其/dev/目录下的对应设备,通过major和minor找到对应的disk,然后挂载在disk->queue上。这个request被执行的时机,全部由这个请求队列决定。哪些方面呢?我们接着看,等所有流程跟踪结束后,会做一个总结。
mmc_blk_probe->mmc_blk_alloc()->mmc_init_queue()
125 mq->card = card;
126 mq->queue = blk_init_queue(mmc_request, lock);
127 if (!mq->queue)
128 return -ENOMEM;
129
130 mq->queue->queuedata = mq;
131 mq->req = NULL;
132
133 blk_queue_prep_rq(mq->queue, mmc_prep_request);
134 blk_queue_ordered(mq->queue, QUEUE_ORDERED_DRAIN, NULL);
135 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue);
这个mmc_init_queue,由调用通用的blk_init_queue创建一个请求队列,在mmc_init_queue中,单独设定了其特定的参数。
其特定的参数由:
queue->request_fn=mmc_request
queue->prep_rq_fn=mmc_prep_request
queue->odered=QUEUE_ORDERED_DRAIN
queue->next_ordered=QUEUE_ORDERED_DRAIN
queue->prepare_flush_fn=NULL
除了这些特定的参数外,还有一些参数是通用的,也是必不可少的。继续往下看:
mmc_blk_probe->mmc_blk_alloc()->mmc_init_queue()->blk_init_queue
540 /**
541 * blk_init_queue – prepare a request queue for use with a block device
542 * @rfn: The function to be called to process requests that have been
543 * placed on the queue.
544 * @lock: Request queue spin lock
545 *
546 * Description:
547 * If a block device wishes to use the standard request handling procedures,
548 * which sorts requests and coalesces adjacent requests, then it must
549 * call blk_init_queue(). The function @rfn will be called when there
550 * are requests on the queue that need to be processed. If the device
551 * supports plugging, then @rfn may not be called immediately when requests
552 * are available on the queue, but may be called at some time later instead.
553 * Plugged queues are generally unplugged when a buffer belonging to one
554 * of the requests on the queue is needed, or due to memory pressure.
555 *
556 * @rfn is not required, or even expected, to remove all requests off the
557 * queue, but only as many as it can handle at a time. If it does leave
558 * requests on the queue, it is responsible for arranging that the requests
559 * get dealt with eventually.
560 *
561 * The queue spin lock must be held while manipulating the requests on the
562 * request queue; this lock will be taken also from interrupt context, so irq
563 * disabling is needed for it.
564 *
565 * Function returns a pointer to the initialized request queue, or %NULL if
566 * it didn’t succeed.
567 *
568 * Note:
569 * blk_init_queue() must be paired with a blk_cleanup_queue() call
570 * when the block device is deactivated (such as at module unload).
571 **/
572
573 struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
574 {
575 return blk_init_queue_node(rfn, lock, -1);
576 }
mmc_blk_probe->mmc_blk_alloc()->mmc_init_queue()->blk_init_queue->blk_init_queue_node
579 struct request_queue *
580 blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
581 {
582 struct request_queue *q = blk_alloc_queue_node(GFP_KERNEL, node_id);
583
584 if (!q)
585 return NULL;
586
587 q->node = node_id;
588 if (blk_init_free_list(q)) {
589 kmem_cache_free(blk_requestq_cachep, q);
590 return NULL;
591 }
592
593 /*
594 * if caller didn’t supply a lock, they get per-queue locking with
595 * our embedded lock
596 */
597 if (!lock)
598 lock = &q->__queue_lock;
599
600 q->request_fn = rfn;
601 q->prep_rq_fn = NULL;
602 q->unplug_fn = generic_unplug_device;
603 q->queue_flags = QUEUE_FLAG_DEFAULT;
604 q->queue_lock = lock;
605
606 blk_queue_segment_boundary(q, BLK_SEG_BOUNDARY_MASK);
607
608 blk_queue_make_request(q, __make_request);
609 blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);
610
611 blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
612 blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
613
614 q->sg_reserved_size = INT_MAX;
615
616 blk_set_cmd_filter_defaults(&q->cmd_filter);
617
618 /*
619 * all done
620 */
621 if (!elevator_init(q, NULL)) {
622 blk_queue_congestion_threshold(q);
623 return q;
624 }
在这里,又定义了几个通用的参数:
queue->unplug_fn=generic_unplug_device
queue->make_request_fn=__make_request,这个函数太通用了
queue->seg_boundary_mask,这个是合并的规则,默认是0xFFFFFFFF
queue->max_segment_size,最大的segment是2^16B(64KB)
queue->max_hw_segments,最多的segment数目(128)
queue->max_phys_segments,最多物理段数目(128)
第608行,通过调用blk_queue_make_request,设置了一个关键的数据结构,queue->unplug_timer,它决定了request执行的时机。
第616行,设置一些标志位,这些标志位起到filter的作用。在request的执行中起作用。
621行设置了I/O调度算法,默认采用“anticipatory”,不过将之改为none也可以。各个块设备的request与电梯调度中的queue是什么关系呢?
mmc_blk_probe->mmc_blk_alloc()->mmc_init_queue()->blk_init_queue->blk_init_queue_node->blk_queue_make_request
98 /**
99 * blk_queue_make_request – define an alternate make_request function for a device
100 * @q: the request queue for the device to be affected
101 * @mfn: the alternate make_request function
102 *
103 * Description:
104 * The normal way for &struct bios to be passed to a device
105 * driver is for them to be collected into requests on a request
106 * queue, and then to allow the device driver to select requests
107 * off that queue when it is ready. This works well for many block
108 * devices. However some block devices (typically virtual devices
109 * such as md or lvm) do not benefit from the processing on the
110 * request queue, and are served best by having the requests passed
111 * directly to them. This can be achieved by providing a function
112 * to blk_queue_make_request().
113 *
114 * Caveat:
115 * The driver that does this *must* be able to deal appropriately
116 * with buffers in “highmemory”. This can be accomplished by either calling
117 * __bio_kmap_atomic() to get a temporary kernel mapping, or by calling
118 * blk_queue_bounce() to create a buffer in normal memory.
119 **/
120 void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
121 {
122 /*
123 * set defaults
124 */
125 q->nr_requests = BLKDEV_MAX_RQ;
126 blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
127 blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
128 blk_queue_segment_boundary(q, BLK_SEG_BOUNDARY_MASK);
129 blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);
130
131 q->make_request_fn = mfn;
132 q->backing_dev_info.ra_pages =
133 (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
134 q->backing_dev_info.state = 0;
135 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
136 blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
137 blk_queue_hardsect_size(q, 512);
138 blk_queue_dma_alignment(q, 511);
139 blk_queue_congestion_threshold(q);
140 q->nr_batching = BLK_BATCH_REQ;
141
142 q->unplug_thresh = 4; /* hmm */
143 q->unplug_delay = (3 * HZ) / 1000; /* 3 milliseconds */
144 if (q->unplug_delay == 0)
145 q->unplug_delay = 1;
146
147 q->unplug_timer.function = blk_unplug_timeout;
148 q->unplug_timer.data = (unsigned long)q;
149
150 /*
151 * by default assume old behaviour and bounce for any highmem page
152 */
153 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
154 }
每当插入一个request的时候,若请求队列为空,则会 blk_plug_device
__make_request
1248 if (!blk_queue_nonrot(q) && elv_queue_empty(q))
1249 blk_plug_device(q);
1250 add_request(q, req);
__make_request->blk_plug_device
205 /*
206 * “plug” the device if there are no outstanding requests: this will
207 * force the transfer to start only after we have put all the requests
208 * on the list.
209 *
210 * This is called with interrupts off and no requests on the queue and
211 * with the queue lock held.
212 */
213 void blk_plug_device(struct request_queue *q)
214 {
215 WARN_ON(!irqs_disabled());
216
217 /*
218 * don’t plug a stopped queue, it must be paired with blk_start_queue()
219 * which will restart the queueing
220 */
221 if (blk_queue_stopped(q))
222 return;
223
224 if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) {
225 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
226 trace_block_plug(q);
227 }
228 }
通过225行的mod_timer来插入一个定时器。当定时器到期后,会执行timer.function,即blk_unplug_timeout。
316 void blk_unplug_timeout(unsigned long data)
317 {
318 struct request_queue *q = (struct request_queue *)data;
319
320 trace_block_unplug_timer(q);
321 kblockd_schedule_work(q, &q->unplug_work);
322 }
321行,定时器又调度了q->unplug_work。而q->unplug_work是什么时候定义的呢?
在前面的blk_init_queue_node就定义了的。
mmc_blk_probe->mmc_blk_alloc()->mmc_init_queue()->blk_init_queue->blk_init_queue_node
579 struct request_queue *
580 blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
581 {
582 struct request_queue *q = blk_alloc_queue_node(GFP_KERNEL, node_id);
mmc_blk_probe->mmc_blk_alloc()->mmc_init_queue()->blk_init_queue->blk_init_queue_node->blk_alloc_queue_node
508 struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
509 {
510 struct request_queue *q;
511 int err;
512
513 q = kmem_cache_alloc_node(blk_requestq_cachep,
514 gfp_mask | __GFP_ZERO, node_id);
515 if (!q)
516 return NULL;
517
518 q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
519 q->backing_dev_info.unplug_io_data = q;
520 err = bdi_init(&q->backing_dev_info);
521 if (err) {
522 kmem_cache_free(blk_requestq_cachep, q);
523 return NULL;
524 }
525
526 init_timer(&q->unplug_timer);
527 setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
528 INIT_LIST_HEAD(&q->timeout_list);
529 INIT_WORK(&q->unplug_work, blk_unplug_work);
530
531 kobject_init(&q->kobj, &blk_queue_ktype);
532
533 mutex_init(&q->sysfs_lock);
534 spin_lock_init(&q->__queue_lock);
535
536 return q;
537 }
在529行,定义了queue->unplug_work。(在第527行,竟然还有一个timer,这个q->timeout的timer什么时候用呢?)
queue->unplug_work的function是blk_unplug_work
blk_unplug_timeout->blk_unplug_work
307 void blk_unplug_work(struct work_struct *work)
308 {
309 struct request_queue *q =
310 container_of(work, struct request_queue, unplug_work);
311
312 trace_block_unplug_io(q);
313 q->unplug_fn(q);
314 }
第313行又调用了q->unplug_fn,即generic_unplug_device(在blk_init_queue_node中定义了)。
blk_unplug_timeout->blk_unplug_work->generic_unplug_device
278 /**
279 * generic_unplug_device – fire a request queue
280 * @q: The &struct request_queue in question
281 *
282 * Description:
283 * Linux uses plugging to build bigger requests queues before letting
284 * the device have at them. If a queue is plugged, the I/O scheduler
285 * is still adding and merging requests on the queue. Once the queue
286 * gets unplugged, the request_fn defined for the queue is invoked and
287 * transfers started.
288 **/
289 void generic_unplug_device(struct request_queue *q)
290 {
291 if (blk_queue_plugged(q)) {
292 spin_lock_irq(q->queue_lock);
293 __generic_unplug_device(q);
294 spin_unlock_irq(q->queue_lock);
295 }
296 }
blk_unplug_timeout->blk_unplug_work->generic_unplug_device->__generic_unplug_device
268 void __generic_unplug_device(struct request_queue *q)
269 {
270 if (unlikely(blk_queue_stopped(q)))
271 return;
272 if (!blk_remove_plug(q) && !blk_queue_nonrot(q))
273 return;
274
275 q->request_fn(q);
276 }
275行的request_fn对于mmc是mmc_request
blk_unplug_timeout->blk_unplug_work->generic_unplug_device->__generic_unplug_device->mmc_request
81 /*
82 * Generic MMC request handler. This is called for any queue on a
83 * particular host. When the host is not busy, we look for a request
84 * on any queue on this host, and attempt to issue it. This may
85 * not be the queue we were asked to process.
86 */
87 static void mmc_request(struct request_queue *q)
88 {
89 struct mmc_queue *mq = q->queuedata;
90 struct request *req;
91 int ret;
92
93 if (!mq) {
94 printk(KERN_ERR “MMC: killing requests for dead queuen”);
95 while ((req = elv_next_request(q)) != NULL) {
96 do {
97 ret = __blk_end_request(req, -EIO,
98 blk_rq_cur_bytes(req));
99 } while (ret);
100 }
101 return;
102 }
103
104 if (!mq->req)
105 wake_up_process(mq->thread);//用线程来写
106 }
在105行,mmc又唤醒了mq->thread.即
mmc_init_queue
202 init_MUTEX(&mq->thread_sem);
203
204 mq->thread = kthread_run(mmc_queue_thread, mq, “mmcqd”);
205 if (IS_ERR(mq->thread)) {
206 ret = PTR_ERR(mq->thread);
207 goto free_bounce_sg;
208 }
blk_unplug_timeout->blk_unplug_work->generic_unplug_device->__generic_unplug_device->mmc_request->mmc_queue_thread
44 static int mmc_queue_thread(void *d)
45 {
46 struct mmc_queue *mq = d;
47 struct request_queue *q = mq->queue;
48
49 current->flags |= PF_MEMALLOC;
50
51 down(&mq->thread_sem);
52 do {
53 struct request *req = NULL;
54
55 spin_lock_irq(q->queue_lock);
56 set_current_state(TASK_INTERRUPTIBLE);
57 if (!blk_queue_plugged(q))
58 req = elv_next_request(q);
59 mq->req = req;
60 spin_unlock_irq(q->queue_lock);
61
62 if (!req) {
63 if (kthread_should_stop()) {
64 set_current_state(TASK_RUNNING);
65 break;
66 }
67 up(&mq->thread_sem);
68 schedule();
69 down(&mq->thread_sem);
70 continue;
71 }
72 set_current_state(TASK_RUNNING);
73
74 mq->issue_fn(mq, req);
75 } while (1);
76 up(&mq->thread_sem);
77
78 return 0;
79 }
80
而,该线程所做的事情是,从请求队列上取下一个request,然后用mq->issue_fn去执行。mq->issue_fn是在mmc_blk_probe->mmc_blk_alloc中定义的:mmc_blk_issue_rq
mmc_queue_thread->mmc_blk_issue_rq:
264 static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
265 {
266 struct mmc_blk_data *md = mq->data;
267 struct mmc_card *card = md->queue.card;
268 struct mmc_blk_request brq;
269 int ret = 1, disable_multi = 0;
270
271 #ifdef CONFIG_MMC_BLOCK_DEFERRED_RESUME
272 if (mmc_bus_needs_resume(card->host)) {
273 mmc_resume_bus(card->host);
274 mmc_blk_set_blksize(md, card);
275 }
276 #endif
277
278 mmc_claim_host(card->host);
279
280 do {
281 struct mmc_command cmd;
282 u32 readcmd, writecmd, status = 0;
283
284 memset(&brq, 0, sizeof(struct mmc_blk_request));
285 brq.mrq.cmd = &brq.cmd;
286 brq.mrq.data = &brq.data;
287
288 brq.cmd.arg = req->sector;
289 if (!mmc_card_blockaddr(card))
290 brq.cmd.arg <<= 9;
291 brq.cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_ADTC;
292 brq.data.blksz = 512;
293 brq.stop.opcode = MMC_STOP_TRANSMISSION;
294 brq.stop.arg = 0;
295 brq.stop.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC;
296 brq.data.blocks = req->nr_sectors;
297
298 /*
299 * The block layer doesn’t support all sector count
300 * restrictions, so we need to be prepared for too big
301 * requests.
302 */
303 if (brq.data.blocks > card->host->max_blk_count)
304 brq.data.blocks = card->host->max_blk_count;
305
306 /*
307 * After a read error, we redo the request one sector at a time
308 * in order to accurately determine which sectors can be read
309 * successfully.
310 */
311 if (disable_multi && brq.data.blocks > 1)
312 brq.data.blocks = 1;
313
314 if (brq.data.blocks > 1) {
315 /* SPI multiblock writes terminate using a special
316 * token, not a STOP_TRANSMISSION request.
317 */
318 if (!mmc_host_is_spi(card->host)
319 || rq_data_dir(req) == READ)
320 brq.mrq.stop = &brq.stop;
321 readcmd = MMC_READ_MULTIPLE_BLOCK;
322 writecmd = MMC_WRITE_MULTIPLE_BLOCK;
323 } else {
324 brq.mrq.stop = NULL;
325 readcmd = MMC_READ_SINGLE_BLOCK;
326 writecmd = MMC_WRITE_BLOCK;
327 }
328
329 if (rq_data_dir(req) == READ) {
330 brq.cmd.opcode = readcmd;
331 brq.data.flags |= MMC_DATA_READ;
332 } else {
333 brq.cmd.opcode = writecmd;
334 brq.data.flags |= MMC_DATA_WRITE;
335 }
336
337 mmc_set_data_timeout(&brq.data, card);
338
339 brq.data.sg = mq->sg;
340 brq.data.sg_len = mmc_queue_map_sg(mq);
341
342 /*
343 * Adjust the sg list so it is the same size as the
344 * request.
345 */
346 if (brq.data.blocks != req->nr_sectors) {
347 int i, data_size = brq.data.blocks << 9;
348 struct scatterlist *sg;
349
350 for_each_sg(brq.data.sg, sg, brq.data.sg_len, i) {
351 data_size -= sg->length;
352 if (data_size <= 0) {
353 sg->length += data_size;
354 i++;
355 break;
356 }
357 }
358 brq.data.sg_len = i;
359 }
360
361 mmc_queue_bounce_pre(mq);
362
363 mmc_wait_for_req(card->host, &brq.mrq);
364
365 mmc_queue_bounce_post(mq);
366
367 /*
368 * Check for errors here, but don’t jump to cmd_err
369 * until later as we need to wait for the card to leave
370 * programming mode even when things go wrong.
371 */
372 if (brq.cmd.error || brq.data.error || brq.stop.error) {
373 if (brq.data.blocks > 1 && rq_data_dir(req) == READ) {
374 /* Redo read one sector at a time */
375 printk(KERN_WARNING “%s: retrying using single “
376 “block readn”, req->rq_disk->disk_name);
377 disable_multi = 1;
378 continue;
379 }
380 status = get_card_status(card, req);
381 } else if (disable_multi == 1) {
382 disable_multi = 0;
383 }
384
385 if (brq.cmd.error) {
386 printk(KERN_ERR “%s: error %d sending read/write “
387 “command, response %#x, card status %#xn”,
388 req->rq_disk->disk_name, brq.cmd.error,
389 brq.cmd.resp[0], status);
390 }
391
392 if (brq.data.error) {
393 if (brq.data.error == -ETIMEDOUT && brq.mrq.stop)
394 /* ‘Stop’ response contains card status */
395 status = brq.mrq.stop->resp[0];
396 printk(KERN_ERR “%s: error %d transferring data,”
397 ” sector %u, nr %u, card status %#xn”,
398 req->rq_disk->disk_name, brq.data.error,
399 (unsigned)req->sector,
400 (unsigned)req->nr_sectors, status);
401 }
402
403 if (brq.stop.error) {
404 printk(KERN_ERR “%s: error %d sending stop command, “
405 “response %#x, card status %#xn”,
406 req->rq_disk->disk_name, brq.stop.error,
407 brq.stop.resp[0], status);
408 }
409
410 if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ) {
411 do {
412 int err;
413
414 cmd.opcode = MMC_SEND_STATUS;
415 cmd.arg = card->rca << 16;
416 cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
417 err = mmc_wait_for_cmd(card->host, &cmd, 5);
418 if (err) {
419 printk(KERN_ERR “%s: error %d requesting statusn”,
420 req->rq_disk->disk_name, err);
421 goto cmd_err;
422 }
423 /*
424 * Some cards mishandle the status bits,
425 * so make sure to check both the busy
426 * indication and the card state.
427 */
428 } while (!(cmd.resp[0] & R1_READY_FOR_DATA) ||
429 (R1_CURRENT_STATE(cmd.resp[0]) == 7));
430
431 #if 0
432 if (cmd.resp[0] & ~0x00000900)
433 printk(KERN_ERR “%s: status = %08xn”,
434 req->rq_disk->disk_name, cmd.resp[0]);
435 if (mmc_decode_status(cmd.resp))
436 goto cmd_err;
437 #endif
438 }
439
440 if (brq.cmd.error || brq.stop.error || brq.data.error) {
441 if (rq_data_dir(req) == READ) {
442 /*
443 * After an error, we redo I/O one sector at a
444 * time, so we only reach here after trying to
445 * read a single sector.
446 */
447 spin_lock_irq(&md->lock);
448 ret = __blk_end_request(req, -EIO, brq.data.blksz);
449 spin_unlock_irq(&md->lock);
450 continue;
451 }
452 goto cmd_err;
453 }
454
455 /*
456 * A block was successfully transferred.
457 */
458 spin_lock_irq(&md->lock);
459 ret = __blk_end_request(req, 0, brq.data.bytes_xfered);
460 spin_unlock_irq(&md->lock);
461 } while (ret);
462
463 mmc_release_host(card->host);
464
465 return 1;
466
467 cmd_err:
468 /*
469 * If this is an SD card and we’re writing, we can first
470 * mark the known good sectors as ok.
471 *
472 * If the card is not SD, we can still ok written sectors
473 * as reported by the controller (which might be less than
474 * the real number of written sectors, but never more).
475 */
476 if (mmc_card_sd(card)) {
477 u32 blocks;
478
479 blocks = mmc_sd_num_wr_blocks(card);
480 if (blocks != (u32)-1) {
481 spin_lock_irq(&md->lock);
482 ret = __blk_end_request(req, 0, blocks << 9);
483 spin_unlock_irq(&md->lock);
484 }
485 } else {
486 spin_lock_irq(&md->lock);
487 ret = __blk_end_request(req, 0, brq.data.bytes_xfered);
488 spin_unlock_irq(&md->lock);
489 }
490
491 mmc_release_host(card->host);
492
493 spin_lock_irq(&md->lock);
494 while (ret)
495 ret = __blk_end_request(req, -EIO, blk_rq_cur_bytes(req));
496 spin_unlock_irq(&md->lock);
497
498 return 0;
499 }
500
该函数所做的事情就是将mmc->host锁住,然后让host进行操作。操作最重要的是363行:mmc_wait_for_req(card->host, &brq.mrq)
mmc_queue_thread->mmc_blk_issue_rq->mmc_wait_for_req
186 /**
187 * mmc_wait_for_req – start a request and wait for completion
188 * @host: MMC host to start command
189 * @mrq: MMC request to start
190 *
191 * Start a new MMC custom command request for a host, and wait
192 * for the command to complete. Does not attempt to parse the
193 * response.
194 */
195 void mmc_wait_for_req(struct mmc_host *host, struct mmc_request *mrq)
196 {
197 DECLARE_COMPLETION_ONSTACK(complete);
198
199 mrq->done_data = &complete;
200 mrq->done = mmc_wait_done;
201
202 mmc_start_request(host, mrq);//maybe have a long time.
203
204 wait_for_completion(&complete);//wait until the data completed.the sem also anipulated by interrupt.
205 }
206
207 EXPORT_SYMBOL(mmc_wait_for_req);
mmc_start_request,就开始操作了。什么时候操作结束呢?要看complete了。complete中的done变量,在数据读取结束时,有中断来将其+1。然后本线程检测到后,就可以结束了。
注:2012.8.24日重新更新了一下排版将代码行间多出的两行空格删掉。这篇浅析是在读书时代总结的,内容分析的比较粗浅,代码看的也不太详细。分析不到位的地方涵请谅解:)。
Nand属于块设备。那么nand块设备是否像其他块设备那样,每次读写都经历一个“C/S”的过程呢?
我们在Goldfish Platform上,从nand的驱动注册开始,看看nand之上的yaffs2文件读写到底是怎样的一个过程。
本文主要是对自己在学习过程中遇到疑问做一个记录,同以前的文章一样,基本上只有流程,那些原理之类的东西,请同学们google吧。在下文中,有些代码可能会有重复,主要目的是不想让各位看官看的太累,跳来跳去,眼镜受不了啊。
代码是Android Kernel 2.6.29.整个记录过程比较仓促,难免会由认识上的错误,欢迎大家指正。
下面是Android在Goldfish Platform上的执行流程:
<1>
377 static int __init init_mtdblock(void)
378 {
379 return register_mtd_blktrans(&mtdblock_tr);
380 }
在代码片段<1>中注册了一个struct mtd_blktrans_ops结构的mtdblock_tr,这个模块是系统在启动过程中加载的,从模块的init名字,可以看出,是针对mtd块设备的。由于在Linux中,Nand被归为MTD设备,MTD设备就是将nand设备封装了一下,让上层没有直接看到nand,而是看到的MTD。实际上,通过MTD来操作Nand,还是通过nand内部的驱动函数。不要把MTD看的太过神秘。如果还需要了解,请Google吧,我之前就是太较真了,一直没有弄明白,read the fucking code之后才算明白过来了。
这个mtd_blktrans_ops结构如下:
<2>
32 struct mtd_blktrans_ops {
33 char *name;
34 int major;
35 int part_bits;
36 int blksize;
37 int blkshift;
38
39 /* Access functions */
40 int (*readsect)(struct mtd_blktrans_dev *dev,
41 unsigned long block, char *buffer);
42 int (*writesect)(struct mtd_blktrans_dev *dev,
43 unsigned long block, char *buffer);
44 int (*discard)(struct mtd_blktrans_dev *dev,
45 unsigned long block, unsigned nr_blocks);
46
47 /* Block layer ioctls */
48 int (*getgeo)(struct mtd_blktrans_dev *dev, struct hd_geometry *geo);
49 int (*flush)(struct mtd_blktrans_dev *dev);
50
51 /* Called with mtd_table_mutex held; no race with add/remove */
52 int (*open)(struct mtd_blktrans_dev *dev);
53 int (*release)(struct mtd_blktrans_dev *dev);
54
55 /* Called on {de,}registration and on subsequent addition/removal
56 of devices, with mtd_table_mutex held. */
57 void (*add_mtd)(struct mtd_blktrans_ops *tr, struct mtd_info *mtd);
58 void (*remove_dev)(struct mtd_blktrans_dev *dev);
59
60 struct list_head devs;
61 struct list_head list;
62 struct module *owner;
63
64 struct mtd_blkcore_priv *blkcore_priv;
65 };
其中第64行的 struct mtd_blkcore_priv,它包含了一个读写请求队列。所有的mtd设备的读写请求共用了一个请求队列。
init_mtdblock->register_mtd_blktrans
340 int register_mtd_blktrans(struct mtd_blktrans_ops *tr)
341 {
342 int ret, i;
343
344 /* Register the notifier if/when the first device type is
345 registered, to prevent the link/init ordering from fucking
346 us over. */
347 if (!blktrans_notifier.list.next)
348 register_mtd_user(&blktrans_notifier);
349
350 tr->blkcore_priv = kzalloc(sizeof(*tr->blkcore_priv), GFP_KERNEL);//几乎算是一个队列了
351 if (!tr->blkcore_priv)
352 return -ENOMEM;
353
354 mutex_lock(&mtd_table_mutex);
355
356 ret = register_blkdev(tr->major, tr->name);//"mtdblk"注册一个通用块设备
357 if (ret) {
358 printk(KERN_WARNING "Unable to register %s block device on major %d: %dn",
359 tr->name, tr->major, ret);
360 kfree(tr->blkcore_priv);
361 mutex_unlock(&mtd_table_mutex);
362 return ret;
363 }
364 spin_lock_init(&tr->blkcore_priv->queue_lock);
365
366 tr->blkcore_priv->rq = blk_init_queue(mtd_blktrans_request, &tr->blkcore_priv->queue_lock);
367 if (!tr->blkcore_priv->rq) {
368 unregister_blkdev(tr->major, tr->name);
369 kfree(tr->blkcore_priv);
370 mutex_unlock(&mtd_table_mutex);
371 return -ENOMEM;
372 }
373
374 tr->blkcore_priv->rq->queuedata = tr;
375 blk_queue_hardsect_size(tr->blkcore_priv->rq, tr->blksize);
376 if (tr->discard)
377 blk_queue_set_discard(tr->blkcore_priv->rq,
378 blktrans_discard_request);
379
380 tr->blkshift = ffs(tr->blksize) – 1;
381
382 tr->blkcore_priv->thread = kthread_run(mtd_blktrans_thread, tr,
383 "%sd", tr->name);
384 if (IS_ERR(tr->blkcore_priv->thread)) {
385 blk_cleanup_queue(tr->blkcore_priv->rq);
386 unregister_blkdev(tr->major, tr->name);
387 kfree(tr->blkcore_priv);
388 mutex_unlock(&mtd_table_mutex);
389 return PTR_ERR(tr->blkcore_priv->thread);
390 }
391
392 INIT_LIST_HEAD(&tr->devs);
393 list_add(&tr->list, &blktrans_majors);
394
395 for (i=0; i<MAX_MTD_DEVICES; i++) {
396 if (mtd_table[i] && mtd_table[i]->type != MTD_ABSENT)
397 tr->add_mtd(tr, mtd_table[i]);//对于每一个mtd设备,都alloc_disk
398 }
399
400 mutex_unlock(&mtd_table_mutex);
401
402 return 0;
403 }
356行,比较重要在/dev/目录下,将多一个mtdblk节点。为啥叫mtdblk呢,第二个参数决定的。^_^
366行,正如上述所言,声明了一个读写请求队列。
382行,声明了一个内核线程。当每一次请求发送的时候,会让这个线程run一次。(引入一个问题:线程自动终结后,如何释放所拥有的资源?)
在第395~398行,其实所有的nand并没有在这里添加,貌似是因为这个时候goldfish_nand设备驱动还没有被加进来出来。所以register_mtd_blktrans的工作至此已经结束了。它的贡献,仅仅是注册了一个mtd_blktrans_ops。
当执行到module_init(goldfish_nand_init)的时候,才开始添加mtd设备。
405 static int __init goldfish_nand_init(void)
406 {
407 return platform_driver_register(&goldfish_nand_driver);
408 }
在这之后,会遍历bus上的所有设备,直到和goldfish_nand相匹配。有同学可能会有疑问,为啥在设备注册的时候不主动去匹配驱动呢?确实,设备会主动去匹配驱动,但是当前驱动的代码还没有被加载进来的时候,去神马地方找驱动捏?
58 static void goldfish_pdev_worker(struct work_struct *work)
59 {
60 int ret;
61 struct pdev_bus_dev *pos, *n;
62
63 list_for_each_entry_safe(pos, n, &pdev_bus_removed_devices, list) {
64 list_del(&pos->list);
65 platform_device_unregister(&pos->pdev);
66 kfree(pos);
67 }
68 list_for_each_entry_safe(pos, n, &pdev_bus_new_devices, list) {
69 list_del(&pos->list);
70 ret = platform_device_register(&pos->pdev);
71 if(ret) {
72 printk("goldfish_pdev_worker failed to register device, %sn", pos->pdev.name);
73 }
74 else {
75 printk("goldfish_pdev_worker registered %sn", pos->pdev.name);
76 }
77 list_add_tail(&pos->list, &pdev_bus_registered_devices);
78 }
79 }
看第70行,可以看到,确实注册了设备,并且在platform_device_register中,也确实去匹配驱动了,只不过没有找到驱动而饮恨“铩羽而归”。然后,当每一次注册一个驱动的时候,会去找对应的设备。当找到对应的设备后,就会调用对应驱动的probe函数了。对于goldfish_nand_driver,其probe函数是:
goldfish_nand_probe
315 static int goldfish_nand_probe(struct platform_device *pdev)
316 {
317 uint32_t num_dev;
318 int i;
319 int err;
320 uint32_t num_dev_working;
321 uint32_t version;
322 struct resource *r;
323 struct goldfish_nand *nand;
324 unsigned char __iomem *base;
325
326 r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
327 if(r == NULL) {
328 err = -ENODEV;
329 goto err_no_io_base;
330 }
331
332 base = ioremap(r->start, PAGE_SIZE);
333 if(base == NULL) {
334 err = -ENOMEM;
335 goto err_ioremap;
336 }
337 version = readl(base + NAND_VERSION);
338 if(version != NAND_VERSION_CURRENT) {
339 printk("goldfish_nand_init: version mismatch, got %d, expected %dn",
340 version, NAND_VERSION_CURRENT);
341 err = -ENODEV;
342 goto err_no_dev;
343 }
344 num_dev = readl(base + NAND_NUM_DEV);
345 if(num_dev == 0) {
346 err = -ENODEV;
347 goto err_no_dev;
348 }
349
350 nand = kzalloc(sizeof(*nand) + sizeof(struct mtd_info) * num_dev, GFP_KERNEL);
351 if(nand == NULL) {
352 err = -ENOMEM;
353 goto err_nand_alloc_failed;
354 }
355 spin_lock_init(&nand->lock);
356 nand->base = base;
357 nand->mtd_count = num_dev;
358 platform_set_drvdata(pdev, nand);
359
360 num_dev_working = 0;
361 for(i = 0; i < num_dev; i++) {
362 err = goldfish_nand_init_device(nand, i);
363 if(err == 0)
364 num_dev_working++;
365 }
366 if(num_dev_working == 0) {
367 err = -ENODEV;
368 goto err_no_working_dev;
369 }
370 return 0;
371
372 err_no_working_dev:
373 kfree(nand);
374 err_nand_alloc_failed:
375 err_no_dev:
376 iounmap(base);
377 err_ioremap:
378 err_no_io_base:
379 return err;
380 }
362行,根据枚举出来的nand,调用goldfish_nand_init_device去初始化。
goldfish_nand_probe->goldfish_nand_init_device
248 static int goldfish_nand_init_device(struct goldfish_nand *nand, int id)
249 {
250 uint32_t name_len;
251 uint32_t result;
252 uint32_t flags;
253 unsigned long irq_flags;
254 unsigned char __iomem *base = nand->base;
255 struct mtd_info *mtd = &nand->mtd[id];
256 char *name;
257
258 spin_lock_irqsave(&nand->lock, irq_flags);
259 writel(id, base + NAND_DEV);
260 flags = readl(base + NAND_DEV_FLAGS);
261 name_len = readl(base + NAND_DEV_NAME_LEN);
262 mtd->writesize = readl(base + NAND_DEV_PAGE_SIZE);
263 mtd->size = readl(base + NAND_DEV_SIZE_LOW);
264 mtd->size |= (uint64_t)readl(base + NAND_DEV_SIZE_HIGH) << 32;
265 mtd->oobsize = readl(base + NAND_DEV_EXTRA_SIZE);
266 mtd->oobavail = mtd->oobsize;
267 mtd->erasesize = readl(base + NAND_DEV_ERASE_SIZE) /
268 (mtd->writesize + mtd->oobsize) * mtd->writesize;
269 do_div(mtd->size, mtd->writesize + mtd->oobsize);
270 mtd->size *= mtd->writesize;
271 printk("goldfish nand dev%d: size %llx, page %d, extra %d, erase %dn",
272 id, mtd->size, mtd->writesize, mtd->oobsize, mtd->erasesize);
273 spin_unlock_irqrestore(&nand->lock, irq_flags);
274
275 mtd->priv = nand;
276
277 mtd->name = name = kmalloc(name_len + 1, GFP_KERNEL);
278 if(name == NULL)
279 return -ENOMEM;
280
281 result = goldfish_nand_cmd(mtd, NAND_CMD_GET_DEV_NAME, 0, name_len, name);
282 if(result != name_len) {
283 kfree(mtd->name);
284 mtd->name = NULL;
285 printk("goldfish_nand_init_device failed to get dev name %d != %dn",
286 result, name_len);
287 return -ENODEV;
288 }
289 ((char *) mtd->name)[name_len] = '';
290
291 /* Setup the MTD structure */
292 mtd->type = MTD_NANDFLASH;
293 mtd->flags = MTD_CAP_NANDFLASH;
294 if(flags & NAND_DEV_FLAG_READ_ONLY)
295 mtd->flags &= ~MTD_WRITEABLE;
296
297 mtd->owner = THIS_MODULE;
298 mtd->erase = goldfish_nand_erase;
299 mtd->read = goldfish_nand_read;
300 mtd->write = goldfish_nand_write;
301 mtd->read_oob = goldfish_nand_read_oob;
302 mtd->write_oob = goldfish_nand_write_oob;
303 mtd->block_isbad = goldfish_nand_block_isbad;
304 mtd->block_markbad = goldfish_nand_block_markbad;
305
306 if (add_mtd_device(mtd)) {
307 kfree(mtd->name);
308 mtd->name = NULL;
309 return -EIO;
310 }
311
312 return 0;
313 }
306行,调用add_mtd_device
goldfish_nand_probe->goldfish_nand_init_device->add_mtd_device
35 /**
36 * add_mtd_device – register an MTD device
37 * @mtd: pointer to new MTD device info structure
38 *
39 * Add a device to the list of MTD devices present in the system, and
40 * notify each currently active MTD 'user' of its arrival. Returns
41 * zero on success or 1 on failure, which currently will only happen
42 * if the number of present devices exceeds MAX_MTD_DEVICES (i.e. 16)
43 */
44
45 int add_mtd_device(struct mtd_info *mtd)
46 {
47 int i;
48
49 BUG_ON(mtd->writesize == 0);
50 mutex_lock(&mtd_table_mutex);
51
52 for (i=0; i < MAX_MTD_DEVICES; i++)
53 if (!mtd_table[i]) {
54 struct mtd_notifier *not;
55
56 mtd_table[i] = mtd;
57 mtd->index = i;
58 mtd->usecount = 0;
59
60 if (is_power_of_2(mtd->erasesize))
61 mtd->erasesize_shift = ffs(mtd->erasesize) – 1;
62 else
63 mtd->erasesize_shift = 0;
64
65 if (is_power_of_2(mtd->writesize))
66 mtd->writesize_shift = ffs(mtd->writesize) – 1;
67 else
68 mtd->writesize_shift = 0;
69
70 mtd->erasesize_mask = (1 << mtd->erasesize_shift) – 1;
71 mtd->writesize_mask = (1 << mtd->writesize_shift) – 1;
72
73 /* Some chips always power up locked. Unlock them now */
74 if ((mtd->flags & MTD_WRITEABLE)
75 && (mtd->flags & MTD_POWERUP_LOCK) && mtd->unlock) {
76 if (mtd->unlock(mtd, 0, mtd->size))
77 printk(KERN_WARNING
78 "%s: unlock failed, "
79 "writes may not workn",
80 mtd->name);
81 }
82
83 DEBUG(0, "mtd: Giving out device %d to %sn",i, mtd->name);
84 /* No need to get a refcount on the module containing
85 the notifier, since we hold the mtd_table_mutex */
86 list_for_each_entry(not, &mtd_notifiers, list)
87 {
88 not->add(mtd);
89 }
90
91 mutex_unlock(&mtd_table_mutex);
92 /* We _know_ we aren't being removed, because
93 our caller is still holding us here. So none
94 of this try_ nonsense, and no bitching about it
95 either.
*/
96 __module_get(THIS_MODULE);
97 return 0;
98 }
99
100 mutex_unlock(&mtd_table_mutex);
101 return 1;
102 }
103
注意第88行,它调用的是:blktrans_notify_add.为啥是这个呢?在初始化的时候,有个模块是init_mtdblock,其调用了register_mtd_blktrans(&mtdblock_tr),结构mtdblock_tr中定义了一些操作mtdblock的一些操作,比如add_mtd。
362 static struct mtd_blktrans_ops mtdblock_tr = {
363 .name = "mtdblock",
364 .major = 31,
365 .part_bits = 0,
366 .blksize = 512,
367 .open = mtdblock_open,
368 .flush = mtdblock_flush,
369 .release = mtdblock_release,
370 .readsect = mtdblock_readsect,
371 .writesect = mtdblock_writesect,
372 .add_mtd = mtdblock_add_mtd,
373 .remove_dev = mtdblock_remove_dev,
374 .owner = THIS_MODULE,
375 };
那么它在初始化的时候,又定义了一个mtd_notifier结构的blktrans_notifier。具体,为什么要搞的这么复杂,Linux主要是为了可扩展性的考虑。
335 static struct mtd_notifier blktrans_notifier = {
336 .add = blktrans_notify_add,
337 .remove = blktrans_notify_remove,
338 };
324 static void blktrans_notify_add(struct mtd_info *mtd)
325 {
326 struct mtd_blktrans_ops *tr;
327
328 if (mtd->type == MTD_ABSENT)
329 return;
330
331 list_for_each_entry(tr, &blktrans_majors, list)
332 tr->add_mtd(tr, mtd);
333 }
回到上文,通过not->add(mtd)添加mtd设备,not->add又调用了tr->add_mtd,这个函数是上面mtdblock_tr定义的mtdblock_add_mtd。
blktrans_notify_add->mtdblock_add_mtd
337 static void mtdblock_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
338 {
339 struct mtd_blktrans_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL);
340
341 if (!dev)
342 return;
343
344 dev->mtd = mtd;
345 dev->devnum = mtd->index;
346
347 dev->size = mtd->size >> 9;
348 dev->tr = tr;
349
350 if (!(mtd->flags & MTD_WRITEABLE))
351 dev->readonly = 1;
352
353 add_mtd_blktrans_dev(dev);
354 }
blktrans_notify_add->mtdblock_add_mtd->add_mtd_blktrans_dev
216 int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
217 {
218 struct mtd_blktrans_ops *tr = new->tr;
219 struct mtd_blktrans_dev *d;
220 int last_devnum = -1;
221 struct gendisk *gd;
222
223 if (mutex_trylock(&mtd_table_mutex)) {
224 mutex_unlock(&mtd_table_mutex);
225 BUG();
226 }
227
228 list_for_each_entry(d, &tr->devs, list) {
229 if (new->devnum == -1) {
230 /* Use first free number */
231 if (d->devnum != last_devnum+1) {
232 /* Found a free devnum. Plug it in here */
233 new->devnum = last_devnum+1;
234 list_add_tail(&new->list, &d->list);
235 goto added;
236 }
237 } else if (d->devnum == new->devnum) {
238 /* Required number taken */
239 return -EBUSY;
240 } else if (d->devnum > new->devnum) {
241 /* Required number was free */
242 list_add_tail(&new->list, &d->list);
243 goto added;
244 }
245 last_devnum = d->devnum;
246 }
247 if (new->devnum == -1)
248 new->devnum = last_devnum+1;
249
250 if ((new->devnum << tr->part_bits) > 256) {
251 return -EBUSY;
252 }
253
254 list_add_tail(&new->list, &tr->devs);
255 added:
256 mutex_init(&new->lock);
257 if (!tr->writesect)
258 new->readonly = 1;
259
260 gd = alloc_disk(1 << tr->part_bits);//哦,在这里分配alloc_disk
261 if (!gd) {
262 list_del(&new->list);
263 return -ENOMEM;
264 }
265 gd->major = tr->major;
266 gd->first_minor = (new->devnum) << tr->part_bits;
267 gd->fops = &mtd_blktrans_ops;
268
269 if (tr->part_bits)
270 if (new->devnum < 26)
271 snprintf(gd->disk_name, sizeof(gd->disk_name),
272 "%s%c", tr->name, 'a' + new->devnum);
273 else
274 snprintf(gd->disk_name, sizeof(gd->disk_name),
275 "%s%c%c", tr->name,
276 'a' – 1 + new->devnum / 26,
277 'a' + new->devnum % 26);
278 else
279 snprintf(gd->disk_name, sizeof(gd->disk_name),
280 "%s%d", tr->name, new->devnum);
281
282 /* 2.5 has capacity in units of 512 bytes while still
283 having BLOCK_SIZE_BITS set to 10. Just to keep us amused. */
284 set_capacity(gd, (new->size * tr->blksize) >> 9);
285
286 gd->private_data = new;
287 new->blkcore_priv = gd;
288 gd->queue = tr->blkcore_priv->rq;//使用的队列是tr的队列
289
290 if (new->readonly)
291 set_disk_ro(gd, 1);
292
293 add_disk(gd);//加入
294
295 return 0;
296 }
288行,使用了tr的队列,这个tr队列是在register_mtd_blktrans(&mtdblock_tr)初始化时定义的。
340 int register_mtd_blktrans(struct mtd_blktrans_ops *tr)
341 {
342 int ret, i;
343
344 /* Register the notifier if/when the first device type is
345 registered, to prevent the link/init ordering from fucking
346 us over. */
347 if (!blktrans_notifier.list.next)
348 register_mtd_user(&blktrans_notifier);
349
350 tr->blkcore_priv = kzalloc(sizeof(*tr->blkcore_priv), GFP_KERNEL);//几乎算是一个队列了
351 if (!tr->blkcore_priv)
352 return -ENOMEM;
353
354 mutex_lock(&mtd_table_mutex);
355
356 ret = register_blkdev(tr->major, tr->name);//"mtdblk"注册一个通用块设备
357 if (ret) {
358 printk(KERN_WARNING "Unable to register %s block device on major %d: %dn",
359 tr->name, tr->major, ret);
360 kfree(tr->blkcore_priv);
361 mutex_unlock(&mtd_table_mutex);
362 return ret;
363 }
364 spin_lock_init(&tr->blkcore_priv->queue_lock);
365 tr->blkcore_priv->rq = blk_init_queue(mtd_blktrans_request, &tr->blkcore_priv->queue_lock);
tr队列比较通用,只有request_fn不同。设置的这么简单,让人始料不及啊。不过令人以外的是,mtd的读写并不经过request等策略。它们是通过什么策略呢?
我们直到在Linux中,文件的读写是经过几个层次,最上面是VFS,然后是具体的文件系统。具体的文件系统决定了,是否经过request策略。我们不妨直接去看看Yaffs2的file_operations对象,从那里入手,看看具体是否经过了request吧。
由于在Linux中的VFS是具有页缓存的,而页缓存相关联的数据结构是address_space,其host是inode。所以,对于文件的读写,基本上最终是通过调用address_space的operations结构。
这个是yaffs2的address_operations结构。它们是否最终request,看看其readpage即可
270 static struct address_space_operations yaffs_file_address_operations = {
271 .readpage = yaffs_readpage,
272 .writepage = yaffs_writepage,
273 #if (YAFFS_USE_WRITE_BEGIN_END > 0)
274 .write_begin = yaffs_write_begin,
275 .write_end = yaffs_write_end,
276 #else
277 .prepare_write = yaffs_prepare_write,
278 .commit_write = yaffs_commit_write,
279 #endif
280 };
由于yaffs_read主要涉及yaffs2文件系统内部的流程,因此不再详述。比较有特点的是,对于yaffs2文件系统,它并没有使用传统的页缓存的概念。在yaffs_device数据结构里面有“yaffs_ChunkCache *srCache;”一个成员变量,其数据类型:
111 /* Special sequence number for bad block that failed to be marked bad */
112 #define YAFFS_SEQUENCE_BAD_BLOCK 0xFFFF0000
113
114 /* ChunkCache is used for short read/write operations.*/
115 typedef struct {
116 struct yaffs_ObjectStruct *object;
117 int chunkId;
118 int lastUse;
119 int dirty;
120 int nBytes; /* Only valid if the cache is dirty */
121 int locked; /* Can't push out or flush while locked. */
122 #ifdef CONFIG_YAFFS_YAFFS2
123 __u8 *data;
124 #else
125 __u8 data[YAFFS_BYTES_PER_CHUNK];
126 #endif
127 } yaffs_ChunkCache;
每一次,在cache中,查找是否存在对应的cache的时候,它的查找函数是这样的:
4015 /* Find a cached chunk */
4016 static yaffs_ChunkCache *yaffs_FindChunkCache(const yaffs_Object *obj,
4017 int chunkId)
4018 {
4019 yaffs_Device *dev = obj->myDev;
4020 int i;
4021 if (dev->nShortOpCaches > 0) {
4022 for (i = 0; i < dev->nShortOpCaches; i++) {
4023 if (dev->srCache[i].object == obj &&
4024 dev->srCache[i].chunkId == chunkId) {
4025 dev->cacheHits++;
4026
4027 return &dev->srCache[i];
4028 }
4029 }
4030 }
4031 return NULL;
4032 }
从上面的查找中,可以发现,它并不像ext2那样,用hash或者radix_tree那样将页缓存组织起来。它的组织方式,和nand设备是一样的。哈,这也许是yaffs2移植性强的一个体现吧。它不局限于在Linux中,不局限于是否支持MTD,只要是一个OS,任意类型的,都可以被移植进去。
所以,回归上文,其实之前申请的alloc_disk,request_queue之类的玩意,都是坑爹的啊。所以,对于Nand设备的读写并没有其他块设备那么复杂啊。之前的那个request_fn竟然都木有用上。
本流程分析针对2.6.29Kernel on Goldfish Platform.
SD卡的读写操作同其他块设备一样,都是异步的过程。当进程把request发到块设备请求队列后,在真正读写时,mq->thread进程会被激活。这个进程准确说属于内核线程,其函数执行主体如下:
44 static int mmc_queue_thread(void *d)
45 {
46 struct mmc_queue *mq = d;
47 struct request_queue *q = mq->queue;
48
49 current->flags |= PF_MEMALLOC;
50
51 down(&mq->thread_sem);
52 do {
53 struct request *req = NULL;
54
55 spin_lock_irq(q->queue_lock);
56 set_current_state(TASK_INTERRUPTIBLE);
57 if (!blk_queue_plugged(q))
58 req = elv_next_request(q);
59 mq->req = req;
60 spin_unlock_irq(q->queue_lock);
61
62 if (!req) {
63 if (kthread_should_stop()) {
64 set_current_state(TASK_RUNNING);
65 break;
66 }
67 up(&mq->thread_sem);
68 schedule();
69 down(&mq->thread_sem);
70 continue;
71 }
72 set_current_state(TASK_RUNNING);
73
74 mq->issue_fn(mq, req);
75 } while (1);
76 up(&mq->thread_sem);
77
78 return 0;
79 }
通过51行和76行,保证只有一个线程操作mq。
接下来,第74行调用mq->issue_fn,即: mmc_blk_issue_rq。
264 static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
265 {
266 struct mmc_blk_data *md = mq->data;
267 struct mmc_card *card = md->queue.card;
268 struct mmc_blk_request brq;
269 int ret = 1, disable_multi = 0;
270
271 #ifdef CONFIG_MMC_BLOCK_DEFERRED_RESUME
272 if (mmc_bus_needs_resume(card->host)) {
273 mmc_resume_bus(card->host);
274 mmc_blk_set_blksize(md, card);
275 }
276 #endif
277
278 mmc_claim_host(card->host);
279
280 do {
281 struct mmc_command cmd;
282 u32 readcmd, writecmd, status = 0;
283
284 memset(&brq, 0, sizeof(struct mmc_blk_request));
285 brq.mrq.cmd = &brq.cmd;
286 brq.mrq.data = &brq.data;
287
288 brq.cmd.arg = req->sector;
289 if (!mmc_card_blockaddr(card))
290 brq.cmd.arg <<= 9;
291 brq.cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_ADTC;
292 brq.data.blksz = 512;
293 brq.stop.opcode = MMC_STOP_TRANSMISSION;
294 brq.stop.arg = 0;
295 brq.stop.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC;
296 brq.data.blocks = req->nr_sectors;
297
298 /*
299 * The block layer doesn't support all sector count
300 * restrictions, so we need to be prepared for too big
301 * requests.
302 */
303 if (brq.data.blocks > card->host->max_blk_count)
304 brq.data.blocks = card->host->max_blk_count;
305
306 /*
307 * After a read error, we redo the request one sector at a time
308 * in order to accurately determine which sectors can be read
309 * successfully.
310 */
311 if (disable_multi && brq.data.blocks > 1)
312 brq.data.blocks = 1;
313
314 if (brq.data.blocks > 1) {
315 /* SPI multiblock writes terminate using a special
316 * token, not a STOP_TRANSMISSION request.
317 */
318 if (!mmc_host_is_spi(card->host)
319 || rq_data_dir(req) == READ)
320 brq.mrq.stop = &brq.stop;
321 readcmd = MMC_READ_MULTIPLE_BLOCK;
322 writecmd = MMC_WRITE_MULTIPLE_BLOCK;
323 } else {
324 brq.mrq.stop = NULL;
325 readcmd = MMC_READ_SINGLE_BLOCK;
326 writecmd = MMC_WRITE_BLOCK;
327 }
328
329 if (rq_data_dir(req) == READ) {
330 brq.cmd.opcode = readcmd;
331 brq.data.flags |= MMC_DATA_READ;
332 } else {
333 brq.cmd.opcode = writecmd;
334 brq.data.flags |= MMC_DATA_WRITE;
335 }
336
337 mmc_set_data_timeout(&brq.data, card);
338
339 brq.data.sg = mq->sg;
340 brq.data.sg_len = mmc_queue_map_sg(mq);
341
342 /*
343 * Adjust the sg list so it is the same size as the
344 * request.
345 */
346 if (brq.data.blocks != req->nr_sectors) {
347 int i, data_size = brq.data.blocks << 9;
348 struct scatterlist *sg;
349
350 for_each_sg(brq.data.sg, sg, brq.data.sg_len, i) {
351 data_size -= sg->length;
352 if (data_size <= 0) {
353 sg->length += data_size;
354 i++;
355 break;
356 }
357 }
358 brq.data.sg_len = i;
359 }
360
361 mmc_queue_bounce_pre(mq);
362
363 mmc_wait_for_req(card->host, &brq.mrq);
364
365 mmc_queue_bounce_post(mq);
366
367 /*
368 * Check for errors here, but don't jump to cmd_err
369 * until later as we need to wait for the card to leave
370 * programming mode even when things go wrong.
371 */
372 if (brq.cmd.error || brq.data.error || brq.stop.error) {
373 if (brq.data.blocks > 1 && rq_data_dir(req) == READ) {
374 /* Redo read one sector at a time */
375 printk(KERN_WARNING "%s: retrying using single "
376 "block readn", req->rq_disk->disk_name);
377 disable_multi = 1;
378 continue;
379 }
380 status = get_card_status(card, req);
381 } else if (disable_multi == 1) {
382 disable_multi = 0;
383 }
384
385 if (brq.cmd.error) {
386 printk(KERN_ERR "%s: error %d sending read/write "
387 "command, response %#x, card status %#xn",
388 req->rq_disk->disk_name, brq.cmd.error,
389 brq.cmd.resp[0], status);
390 }
391
392 if (brq.data.error) {
393 if (brq.data.error == -ETIMEDOUT && brq.mrq.stop)
394 /* 'Stop' response contains card status */
395 status = brq.mrq.stop->resp[0];
396 printk(KERN_ERR "%s: error %d transferring data,"
397 " sector %u, nr %u, card status %#xn",
398 req->rq_disk->disk_name, brq.data.error,
399 (unsigned)req->sector,
400 (unsigned)req->nr_sectors, status);
401 }
402
403 if (brq.stop.error) {
404 printk(KERN_ERR "%s: error %d sending stop command, "
405 "response %#x, card status %#xn",
406 req->rq_disk->disk_name, brq.stop.error,
407 brq.stop.resp[0], status);
408 }
409
410 if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ) {
411 do {
412 int err;
413
414 cmd.opcode = MMC_SEND_STATUS;
415 cmd.arg = card->rca << 16;
416 cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
417 err = mmc_wait_for_cmd(card->host, &cmd, 5);
418 if (err) {
419 printk(KERN_ERR "%s: error %d requesting statusn",
420 req->rq_disk->disk_name, err);
421 goto cmd_err;
422 }
423 /*
424 * Some cards mishandle the status bits,
425 * so make sure to check both the busy
426 * indication and the card state.
427 */
428 } while (!(cmd.resp[0] & R1_READY_FOR_DATA) ||
429 (R1_CURRENT_STATE(cmd.resp[0]) == 7));
430
431 #if 0
432 if (cmd.resp[0] & ~0x00000900)
433 printk(KERN_ERR "%s: status = %08xn",
434 req->rq_disk->disk_name, cmd.resp[0]);
435 if (mmc_decode_status(cmd.resp))
436 goto cmd_err;
437 #endif
438 }
439
440 if (brq.cmd.error || brq.stop.error || brq.data.error) {
441 if (rq_data_dir(req) == READ) {
442 /*
443 * After an error, we redo I/O one sector at a
444 * time, so we only reach here after trying to
445 * read a single sector.
446 */
447 spin_lock_irq(&md->lock);
448 ret = __blk_end_request(req, -EIO, brq.data.blksz);
449 spin_unlock_irq(&md->lock);
450 continue;
451 }
452 goto cmd_err;
453 }
454
455 /*
456 * A block was successfully transferred.
457 */
458 spin_lock_irq(&md->lock);
459 ret = __blk_end_request(req, 0, brq.data.bytes_xfered);
460 spin_unlock_irq(&md->lock);
461 } while (ret);
462
463 mmc_release_host(card->host);
464
465 return 1;
466
467 cmd_err:
468 /*
469 * If this is an SD card and we're writing, we can first
470 * mark the known good sectors as ok.
471 *
472 * If the card is not SD, we can still ok written sectors
473 * as reported by the controller (which might be less than
474 * the real number of written sectors, but never more).
475 */
476 if (mmc_card_sd(card)) {
477 u32 blocks;
478
479 blocks = mmc_sd_num_wr_blocks(card);
480 if (blocks != (u32)-1) {
481 spin_lock_irq(&md->lock);
482 ret = __blk_end_request(req, 0, blocks << 9);
483 spin_unlock_irq(&md->lock);
484 }
485 } else {
486 spin_lock_irq(&md->lock);
487 ret = __blk_end_request(req, 0, brq.data.bytes_xfered);
488 spin_unlock_irq(&md->lock);
489 }
490
491 mmc_release_host(card->host);
492
493 spin_lock_irq(&md->lock);
494 while (ret)
495 ret = __blk_end_request(req, -EIO, blk_rq_cur_bytes(req));
496 spin_unlock_irq(&md->lock);
497
498 return 0;
499 }
278行和491行保证了,当前握有card->host的唯一性。
280行~360行,根据当前的request,再次组织一个新的block request,通过363行,进行读写。
186 /**
187 * mmc_wait_for_req – start a request and wait for completion
188 * @host: MMC host to start command
189 * @mrq: MMC request to start
190 *
191 * Start a new MMC custom command request for a host, and wait
192 * for the command to complete. Does not attempt to parse the
193 * response.
194 */
195 void mmc_wait_for_req(struct mmc_host *host, struct mmc_request *mrq)
196 {
197 DECLARE_COMPLETION_ONSTACK(complete);
198
199 mrq->done_data = &complete;
200 mrq->done = mmc_wait_done;
201
202 mmc_start_request(host, mrq);//maybe have a long time.
203
204 wait_for_completion(&complete);//wait until the data completed.the sem also anipulated by interrupt.
205 }
186行~205行,是个真正的读写过程。通过complete保证了:只有数据读写完毕,这个函数才返回。否则,将一直等待(等待的过程在204行)。
123 static void
124 mmc_start_request(struct mmc_host *host, struct mmc_request *mrq)
125 {
126 #ifdef CONFIG_MMC_DEBUG
127 unsigned int i, sz;
128 struct scatterlist *sg;
129 #endif
130
131 pr_debug("%s: starting CMD%u arg %08x flags %08xn",
132 mmc_hostname(host), mrq->cmd->opcode,
133 mrq->cmd->arg, mrq->cmd->flags);
134
135 if (mrq->data) {
136 pr_debug("%s: blksz %d blocks %d flags %08x "
137 "tsac %d ms nsac %dn",
138 mmc_hostname(host), mrq->data->blksz,
139 mrq->data->blocks, mrq->data->flags,
140 mrq->data->timeout_ns / 1000000,
141 mrq->data->timeout_clks);
142 }
143
144 if (mrq->stop) {
145 pr_debug("%s: CMD%u arg %08x flags %08xn",
146 mmc_hostname(host), mrq->stop->opcode,
147 mrq->stop->arg, mrq->stop->flags);
148 }
149
150 WARN_ON(!host->claimed);
151
152 led_trigger_event(host->led, LED_FULL);
153
154 mrq->cmd->error = 0;
155 mrq->cmd->mrq = mrq;
156 if (mrq->data) {
157 BUG_ON(mrq->data->blksz > host->max_blk_size);
158 BUG_ON(mrq->data->blocks > host->max_blk_count);
159 BUG_ON(mrq->data->blocks * mrq->data->blksz >
160 host->max_req_size);
161
162 #ifdef CONFIG_MMC_DEBUG
163 sz = 0;
164 for_each_sg(mrq->data->sg, sg, mrq->data->sg_len, i)
165 sz += sg->length;
166 BUG_ON(sz != mrq->data->blocks * mrq->data->blksz);
167 #endif
168
169 mrq->cmd->data = mrq->data;
170 mrq->data->error = 0;
171 mrq->data->mrq = mrq;
172 if (mrq->stop) {
173 mrq->data->stop = mrq->stop;
174 mrq->stop->error = 0;
175 mrq->stop->mrq = mrq;
176 }
177 }
178 host->ops->request(host, mrq);//对于goldish会调用goldfish_mmc_request
179 }
178行注释,调用goldfish_mmc_request进行真正的读写。
400 static void goldfish_mmc_request(struct mmc_host *mmc, struct mmc_request *req)
401 {
402 struct goldfish_mmc_host *host = mmc_priv(mmc);
403
404 WARN_ON(host->mrq != NULL);
405
406 host->mrq = req;
407 goldfish_mmc_prepare_data(host, req);//parameters be written and ready
408 goldfish_mmc_start_command(host, req->cmd);//data been transported
409
410 /* this is to avoid accidentally being detected as an SDIO card in mmc_attach_sdio() */
411 if (req->cmd->opcode == SD_IO_SEND_OP_COND &&
412 req->cmd->flags == (MMC_RSP_SPI_R4 | MMC_RSP_R4 | MMC_CMD_BCR)) {
413 req->cmd->error = -EINVAL;
414 }
415 }
第408行,调用goldfish_mmc_request
156 static void
157 goldfish_mmc_start_command(struct goldfish_mmc_host *host, struct mmc_command *cmd)
158 {
159 u32 cmdreg;
160 u32 resptype;
161 u32 cmdtype;
162
163 host->cmd = cmd;
164
165 resptype = 0;
166 cmdtype = 0;
167
168 /* Our hardware needs to know exact type */
169 switch (mmc_resp_type(cmd)) {
170 case MMC_RSP_NONE:
171 break;
172 case MMC_RSP_R1:
173 case MMC_RSP_R1B:
174 /* resp 1, 1b, 6, 7 */
175 resptype = 1;
176 break;
177 case MMC_RSP_R2:
178 resptype = 2;
179 break;
180 case MMC_RSP_R3:
181 resptype = 3;
182 break;
183 default:
184 dev_err(mmc_dev(host->mmc), "Invalid response type: %04xn", mmc_resp_type(cmd));
185 break;
186 }
187
188 if (mmc_cmd_type(cmd) == MMC_CMD_ADTC) {
189 cmdtype = OMAP_MMC_CMDTYPE_ADTC;
190 } else if (mmc_cmd_type(cmd) == MMC_CMD_BC) {
191 cmdtype = OMAP_MMC_CMDTYPE_BC;
192 } else if (mmc_cmd_type(cmd) == MMC_CMD_BCR) {
193 cmdtype = OMAP_MMC_CMDTYPE_BCR;
194 } else {
195 cmdtype = OMAP_MMC_CMDTYPE_AC;
196 }
197
198 cmdreg = cmd->opcode | (resptype << 8) | (cmdtype << 12);
199
200 if (host->bus_mode == MMC_BUSMODE_OPENDRAIN)
201 cmdreg |= 1 << 6;
202
203 if (cmd->flags & MMC_RSP_BUSY)
204 cmdreg |= 1 << 11;
205
206 if (host->data && !(host->data->flags & MMC_DATA_WRITE))
207 cmdreg |= 1 << 15;
208
209 GOLDFISH_MMC_WRITE(host, MMC_ARG, cmd->arg);
210 GOLDFISH_MMC_WRITE(host, MMC_CMD, cmdreg);
211 }
这个过程可能会等一段时间。
什么时候,才知道数据读写完毕呢?通过中断。当数据读写完毕后,host会向系统发起一个中断。在中断中,将调用第200行的mmc_wait_done。其中断函数的代码如下:
291 static irqreturn_t goldfish_mmc_irq(int irq, void *dev_id)
292 {
293 struct goldfish_mmc_host * host = (struct goldfish_mmc_host *)dev_id;
294 u16 status;
295 int end_command = 0;
296 int end_transfer = 0;
297 int transfer_error = 0;
298 int state_changed = 0;
299 int cmd_timeout = 0;
300
301 while ((status = GOLDFISH_MMC_READ(host, MMC_INT_STATUS)) != 0) {
302 GOLDFISH_MMC_WRITE(host, MMC_INT_STATUS, status);
303
304 if (status & MMC_STAT_END_OF_CMD) {
305 end_command = 1;
306 }
307
308 if (status & MMC_STAT_END_OF_DATA) {
309 end_transfer = 1;
310 }
311 if (status & MMC_STAT_STATE_CHANGE) {
312 state_changed = 1;
313 }
314
315 if (status & MMC_STAT_CMD_TIMEOUT) {
316 end_command = 0;
317 cmd_timeout = 1;
318 }
319 }
320
321 if (cmd_timeout) {
322 struct mmc_request *mrq = host->mrq;
323 mrq->cmd->error = -ETIMEDOUT;
324 host->mrq = NULL;
325 mmc_request_done(host->mmc, mrq);
326 }
327
328 if (end_command) {
329 goldfish_mmc_cmd_done(host, host->cmd);
330 }
331 if (transfer_error)
332 goldfish_mmc_xfer_done(host, host->data);
333 else if (end_transfer) {
334 host->dma_done = 1;
335 goldfish_mmc_end_of_data(host, host->data);
336 }
337 if (state_changed) {
338 u32 state = GOLDFISH_MMC_READ(host, MMC_STATE);
339 pr_info("%s: Card detect now %dn", __func__,
340 (state & MMC_STATE_INSERTED));
341 mmc_detect_change(host->mmc, 0);
342 }
343
344 if (!end_command && !end_transfer &&
345 !transfer_error && !state_changed && !cmd_timeout) {
346 status = GOLDFISH_MMC_READ(host, MMC_INT_STATUS);
347 dev_info(mmc_dev(host->mmc),"spurious irq 0x%04xn", status);
348 if (status != 0) {
349 GOLDFISH_MMC_WRITE(host, MMC_INT_STATUS, status);
350 GOLDFISH_MMC_WRITE(host, MMC_INT_ENABLE, 0);
351 }
352 }
353
354 return IRQ_HANDLED;
355 }
在第333~336行,如果数据传输完毕后,会执行335行的goldfish_mmc_end_of_data(),注意host->dma_done设置为1,下面的程序会调用到。
252 static void
253 goldfish_mmc_end_of_data(struct goldfish_mmc_host *host, struct mmc_data *data)
254 {
255 if (!host->dma_in_use) {
256 goldfish_mmc_xfer_done(host, data);
257 return;
258 }
259 if (host->dma_done)
260 goldfish_mmc_xfer_done(host, data);
261 }
由于之前host->dma_done设置为1,那么执行259~260行。即调用goldfish_mmc_xfer_done
213 static void
214 goldfish_mmc_xfer_done(struct goldfish_mmc_host *host, struct mmc_data *data)
215 {
216 if (host->dma_in_use) {
217 enum dma_data_direction dma_data_dir;
218
219 if (data->flags & MMC_DATA_WRITE)
220 dma_data_dir = DMA_TO_DEVICE;
221 else
222 dma_data_dir = DMA_FROM_DEVICE;
223
224 if (dma_data_dir == DMA_FROM_DEVICE) {
225 // we don't really have DMA, so we need to copy from our platform driver buffer
226 uint8_t* dest = (uint8_t *)sg_virt(data->sg);
227 memcpy(dest, host->virt_base, data->sg->length);
228 }
229
230 host->data->bytes_xfered += data->sg->length;
231
232 dma_unmap_sg(mmc_dev(host->mmc), data->sg, host->sg_len, dma_data_dir);
233 }
234
235 host->data = NULL;
236 host->sg_len = 0;
237
238 /* NOTE: MMC layer will sometimes poll-wait CMD13 next, issuing
239 * dozens of requests until the card finishes writing data.
240 * It'd be cheaper to just wait till an EOFB interrupt arrives…
241 */
242
243 if (!data->stop) {
244 host->mrq = NULL;
245 mmc_request_done(host->mmc, data->mrq);
246 return;
247 }
248
249 goldfish_mmc_start_command(host, data->stop);
250 }
第245行,调用了mmc_request_done
69 /**
70 * mmc_request_done – finish processing an MMC request
71 * @host: MMC host which completed request
72 * @mrq: MMC request which request
73 *
74 * MMC drivers should call this function when they have completed
75 * their processing of a request.
76 */
77 void mmc_request_done(struct mmc_host *host, struct mmc_request *mrq)
78 {
79 struct mmc_command *cmd = mrq->cmd;
80 int err = cmd->error;
81
82 if (err && cmd->retries && mmc_host_is_spi(host)) {
83 if (cmd->resp[0] & R1_SPI_ILLEGAL_COMMAND)
84 cmd->retries = 0;
85 }
86
87 if (err && cmd->retries) {
88 pr_debug("%s: req failed (CMD%u): %d, retrying…n",
89 mmc_hostname(host), cmd->opcode, err);
90
91 cmd->retries–;
92 cmd->error = 0;
93 host->ops->request(host, mrq);
94 } else {
95 led_trigger_event(host->led, LED_OFF);
96
97 pr_debug("%s: req done (CMD%u): %d: %08x %08x %08x %08xn",
98 mmc_hostname(host), cmd->opcode, err,
99 cmd->resp[0], cmd->resp[1],
100 cmd->resp[2], cmd->resp[3]);
101
102 if (mrq->data) {
103 pr_debug("%s: %d bytes transferred: %dn",
104 mmc_hostname(host),
105 mrq->data->bytes_xfered, mrq->data->error);
106 }
107
108 if (mrq->stop) {
109 pr_debug("%s: (CMD%u): %d: %08x %08x %08x %08xn",
110 mmc_hostname(host), mrq->stop->opcode,
111 mrq->stop->error,
112 mrq->stop->resp[0], mrq->stop->resp[1],
113 mrq->stop->resp[2], mrq->stop->resp[3]);
114 }
115
116 if (mrq->done)
117 mrq->done(mrq);
118 }
119 }
最终会调用117行的mrq->done,即mmc_wait_done
181 static void mmc_wait_done(struct mmc_request *mrq)
182 {
183 complete(mrq->done_data);
184 }
183行中的mrq->done_data被设置为了&complete(看mmc_wait_for_req)。
4824 /**
4825 * complete: – signals a single thread waiting on this completion
4826 * @x: holds the state of this particular completion
4827 *
4828 * This will wake up a single thread waiting on this completion. Threads will be
4829 * awakened in the same order in which they were queued.
4830 *
4831 * See also complete_all(), wait_for_completion() and related routines.
4832 */
4833 void complete(struct completion *x)
4834 {
4835 unsigned long flags;
4836
4837 spin_lock_irqsave(&x->wait.lock, flags);
4838 x->done++;
4839 __wake_up_common(&x->wait, TASK_NORMAL, 1, 0, NULL);
4840 spin_unlock_irqrestore(&x->wait.lock, flags);
4841 }
4842 EXPORT_SYMBOL(complete);
看4838行,done++后,再调用complete上的进程,这时候,进程就可以结束 wait_for_completion(&complete)了。wait_for_completion代码如下:
4898 /**
4899 * wait_for_completion: – waits for completion of a task
4900 * @x: holds the state of this particular completion
4901 *
4902 * This waits to be signaled for completion of a specific task. It is NOT
4903 * interruptible and there is no timeout.
4904 *
4905 * See also similar routines (i.e. wait_for_completion_timeout()) with timeout
4906 * and interrupt capability. Also see complete().
4907 */
4908 void __sched wait_for_completion(struct completion *x)
4909 {
4910 wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
4911 }
4912 EXPORT_SYMBOL(wait_for_completion);
4887 static long __sched
4888 wait_for_common(struct completion *x, long timeout, int state)
4889 {
4890 might_sleep();
4891
4892 spin_lock_irq(&x->wait.lock);
4893 timeout = do_wait_for_common(x, timeout, state);
4894 spin_unlock_irq(&x->wait.lock);
4895 return timeout;
4896 }
4861 static inline long __sched
4862 do_wait_for_common(struct completion *x, long timeout, int state)
4863 {
4864 if (!x->done) {
4865 DECLARE_WAITQUEUE(wait, current);
4866
4867 wait.flags |= WQ_FLAG_EXCLUSIVE;
4868 __add_wait_queue_tail(&x->wait, &wait);
4869 do {
4870 if (signal_pending_state(state, current)) {
4871 timeout = -ERESTARTSYS;
4872 break;
4873 }
4874 __set_current_state(state);
4875 spin_unlock_irq(&x->wait.lock);
4876 timeout = schedule_timeout(timeout);
4877 spin_lock_irq(&x->wait.lock);
4878 } while (!x->done && timeout);
4879 __remove_wait_queue(&x->wait, &wait);
4880 if (!x->done)
4881 return timeout;
4882 }
4883 x->done–;
4884 return timeout ?: 1;
4885 }
着重看4878行。
至此,基本上,mmc读写请求的整个过程就分析结束了。