- A+
从pagecache到bio到requst
我们从read流程和write流程中都知道,最后都会到readpages或者writepages中来
我们就从readpage开始分析,write的基本上和《Linux-块设备驱动之框架详细分析(详解)》讲的差不多,
也就是会到ll_rw_block函数中,至于怎么到这个函数中,就不求甚解了。
我们来看readpage流程。
static int read_pages(struct address_space *mapping, struct file *filp, struct list_head *pages, unsigned nr_pages) { struct blk_plug plug; unsigned page_idx; int ret; blk_start_plug(&plug); if (mapping->a_ops->readpages) { put_pages_list(pages); goto out; } for (page_idx = 0; page_idx < nr_pages; page_idx++) { struct page *page = list_to_page(pages); list_del(&page->lru); if (!add_to_page_cache_lru(page, mapping, page->index, mapping_gfp_constraint(mapping, GFP_KERNEL))) { mapping->a_ops->readpage(filp, page); } page_cache_release(page); } ret = 0; out: blk_finish_plug(&plug); return ret; }
假设我们读取的文件还没有在lru中。就一定会走mapping->a_ops->readpage
我们假设是ext4文件系统。
static const struct address_space_operations ext4_aops = { .readpage = ext4_readpage, .readpages = ext4_readpages, .writepage = ext4_writepage, .writepages = ext4_writepages, .write_begin = ext4_write_begin, .write_end = ext4_write_end, .bmap = ext4_bmap, .invalidatepage = ext4_invalidatepage, .releasepage = ext4_releasepage, .direct_IO = ext4_direct_IO, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, }; static int ext4_readpages(struct file *file, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { struct inode *inode = mapping->host; if (ext4_has_inline_data(inode)) return 0; return ext4_mpage_readpages(mapping, pages, NULL, nr_pages); }
没有做什么,就是你要多少pages,就转到ext4_mpage_readpages中来。
int ext4_mpage_readpages(struct address_space *mapping, struct list_head *pages, struct page *page, unsigned nr_pages) { struct bio *bio = NULL; unsigned page_idx; sector_t last_block_in_bio = 0; struct inode *inode = mapping->host; const unsigned blkbits = inode->i_blkbits; const unsigned blocks_per_page = PAGE_CACHE_SIZE >> blkbits; const unsigned blocksize = 1 << blkbits; sector_t block_in_file; sector_t last_block; sector_t last_block_in_file; sector_t blocks[MAX_BUF_PER_PAGE]; unsigned page_block; struct block_device *bdev = inode->i_sb->s_bdev; int length; unsigned relative_block = 0; struct ext4_map_blocks map; map.m_pblk = 0; map.m_lblk = 0; map.m_len = 0; map.m_flags = 0; for (page_idx = 0; nr_pages; page_idx++, nr_pages--) { int fully_mapped = 1; unsigned first_hole = blocks_per_page; prefetchw(&page->flags); if (pages) { page = list_entry(pages->prev, struct page, lru); list_del(&page->lru); if (add_to_page_cache_lru(page, mapping, page->index, mapping_gfp_constraint(mapping, GFP_KERNEL))) goto next_page; } if (page_has_buffers(page)) goto confused; block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits); last_block = block_in_file + nr_pages * blocks_per_page; last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits; if (last_block > last_block_in_file) last_block = last_block_in_file; page_block = 0; /* * Map blocks using the previous result first. */ if ((map.m_flags & EXT4_MAP_MAPPED) && block_in_file > map.m_lblk && block_in_file < (map.m_lblk + map.m_len)) { unsigned map_offset = block_in_file - map.m_lblk; unsigned last = map.m_len - map_offset; for (relative_block = 0; ; relative_block++) { if (relative_block == last) { /* needed? */ map.m_flags &= ~EXT4_MAP_MAPPED; break; } if (page_block == blocks_per_page) break; blocks[page_block] = map.m_pblk + map_offset + relative_block; page_block++; block_in_file++; } } /* * Then do more ext4_map_blocks() calls until we are * done with this page. */ while (page_block < blocks_per_page) { if (block_in_file < last_block) { map.m_lblk = block_in_file; map.m_len = last_block - block_in_file; if (ext4_map_blocks(NULL, inode, &map, 0) < 0) { set_error_page: SetPageError(page); zero_user_segment(page, 0, PAGE_CACHE_SIZE); unlock_page(page); goto next_page; } } if ((map.m_flags & EXT4_MAP_MAPPED) == 0) { fully_mapped = 0; if (first_hole == blocks_per_page) first_hole = page_block; page_block++; block_in_file++; continue; } if (first_hole != blocks_per_page) goto confused; /* hole -> non-hole */ /* Contiguous blocks? */ if (page_block && blocks[page_block-1] != map.m_pblk-1) goto confused; for (relative_block = 0; ; relative_block++) { if (relative_block == map.m_len) { /* needed? */ map.m_flags &= ~EXT4_MAP_MAPPED; break; } else if (page_block == blocks_per_page) break; blocks[page_block] = map.m_pblk+relative_block; page_block++; block_in_file++; } } if (first_hole != blocks_per_page) { zero_user_segment(page, first_hole << blkbits, PAGE_CACHE_SIZE); if (first_hole == 0) { SetPageUptodate(page); unlock_page(page); goto next_page; } } else if (fully_mapped) { SetPageMappedToDisk(page); } if (fully_mapped && blocks_per_page == 1 && !PageUptodate(page) && cleancache_get_page(page) == 0) { SetPageUptodate(page); goto confused; } /* * This page will go to BIO. Do we need to send this * BIO off first? */ if (bio && (last_block_in_bio != blocks[0] - 1)) { submit_and_realloc: ext4_submit_bio_read(bio); bio = NULL; } if (bio == NULL) { struct ext4_crypto_ctx *ctx = NULL; if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { ctx = ext4_get_crypto_ctx(inode); if (IS_ERR(ctx)) goto set_error_page; } bio = bio_alloc(GFP_KERNEL, min_t(int, nr_pages, BIO_MAX_PAGES)); if (!bio) { if (ctx) ext4_release_crypto_ctx(ctx); goto set_error_page; } bio->bi_bdev = bdev; bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9); bio->bi_end_io = mpage_end_io; bio->bi_private = ctx; } length = first_hole << blkbits; if (bio_add_page(bio, page, length, 0) < length) goto submit_and_realloc; if (((map.m_flags & EXT4_MAP_BOUNDARY) && (relative_block == map.m_len)) || (first_hole != blocks_per_page)) { ext4_submit_bio_read(bio); bio = NULL; } else last_block_in_bio = blocks[blocks_per_page - 1]; goto next_page; confused: if (bio) { ext4_submit_bio_read(bio); bio = NULL; } if (!PageUptodate(page)) block_read_full_page(page, ext4_get_block); else unlock_page(page); next_page: if (pages) page_cache_release(page); } BUG_ON(pages && !list_empty(pages)); if (bio) ext4_submit_bio_read(bio); return 0; }
1、前面是做一些结构体的初始化
2、后面就是for循环遍历每个page
3、然后就是说对建立一个bio结构体
4、然后对bio结构体的数据进行初始化
5、建立page和bio的关联。bio_add_page
6、然后是提交这个bio去申请数据ext4_submit_bio_read
这里我们要注意关联的部分
int bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int offset) { struct bio_vec *bv; /* * cloned bio must not modify vec list */ if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED))) return 0; if (bio->bi_vcnt > 0) { bv = &bio->bi_io_vec[bio->bi_vcnt - 1]; if (page == bv->bv_page && offset == bv->bv_offset + bv->bv_len) { bv->bv_len += len; goto done; } } if (bio->bi_vcnt >= bio->bi_max_vecs) return 0; bv = &bio->bi_io_vec[bio->bi_vcnt]; bv->bv_page = page; bv->bv_len = len; bv->bv_offset = offset; bio->bi_vcnt++; done: bio->bi_iter.bi_size += len; return len; }
关联也很简单就是数据指针。我们通过之前的《2.Linux 块设备驱动代码编写---简单驱动》 这个程序里,
pRHdata = pdev->data + (bio->bi_sector * RAMHD_SECTOR_SIZE); bio_for_each_segment(bvec, bio, i){ pBuffer = kmap(bvec->bv_page) + bvec->bv_offset; switch(bio_data_dir(bio)){ case READ: memcpy(pBuffer, pRHdata, bvec->bv_len); flush_dcache_page(bvec->bv_page); break;
我们知道,到了驱动,它会把磁盘或者块设备中数据拷贝到bv_page,而这个指针就是我们前面关联的。
另外一种方式的《4.Linux-块设备驱动(详解)》是只有requst的,其实也是一样的,只是封装格式不同而已。
至于最后的
然后是提交这个bio去申请数据ext4_submit_bio_read
最后会到submit_bio这个函数,后面我们会分析。
但是这里是有疑问的,这里每个page进行一次submit_bio,这个就不对了把,每个bio是有链表,很明显这里没有连接在一块。
其实应该猜到了,会在submit_bio 的过程中连接在一块的。
submit_bio
static void ext4_submit_bio_read(struct bio *bio) { if (trace_android_fs_dataread_start_enabled()) { struct page *first_page = bio->bi_io_vec[0].bv_page; if (first_page != NULL) { trace_android_fs_dataread_start( first_page->mapping->host, page_offset(first_page), bio->bi_iter.bi_size, current->pid, current->comm); } } submit_bio(READ, bio); }
上次我们分析到这里ext4_submit_bio_read
后面就到了submit_bio.
block/blk-core.c blk_qc_t submit_bio(int rw, struct bio *bio) { bio->bi_rw |= rw; if (bio_has_data(bio)) { unsigned int count; if (unlikely(rw & REQ_WRITE_SAME)) count = bdev_logical_block_size(bio->bi_bdev) >> 9; else count = bio_sectors(bio); if (rw & WRITE) { count_vm_events(PGPGOUT, count); } else { task_io_account_read(bio->bi_iter.bi_size); count_vm_events(PGPGIN, count); } mtk_btag_pidlog_submit_bio(bio); if (unlikely(block_dump)) { char b[BDEVNAME_SIZE]; printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n", current->comm, task_pid_nr(current), (rw & WRITE) ? "WRITE" : "READ", (unsigned long long)bio->bi_iter.bi_sector, bdevname(bio->bi_bdev, b), count); } } return generic_make_request(bio); }
这里就直接到generic_make_requst,
blk_qc_t generic_make_request(struct bio *bio) { struct bio_list bio_list_on_stack; blk_qc_t ret = BLK_QC_T_NONE; if (!generic_make_request_checks(bio)) goto out; if (current->bio_list) { bio_list_add(current->bio_list, bio); goto out; } BUG_ON(bio->bi_next); bio_list_init(&bio_list_on_stack); current->bio_list = &bio_list_on_stack; do { struct request_queue *q = bdev_get_queue(bio->bi_bdev); if (likely(blk_queue_enter(q, __GFP_DIRECT_RECLAIM) == 0)) { ret = q->make_request_fn(q, bio); blk_queue_exit(q); bio = bio_list_pop(current->bio_list); } else { struct bio *bio_next = bio_list_pop(current->bio_list); bio_io_error(bio); bio = bio_next; } } while (bio); current->bio_list = NULL; /* deactivate */ out: return ret; }
这里面很有意思的就是有个current->bio_list ,类似上篇疑问的解答。
# Linux必备书籍推荐
《LINUX内核源代码情景分析(上册) 》毛德操,胡希明 >>>京东购买 >>>淘宝购买
《LINUX内核源代码情景分析(下册) 》毛德操,胡希明 >>>京东购买 >>>淘宝购买
《嵌入式Linux应用开发完全手册 》韦东山 著 >>>京东购买 >>>淘宝购买 (领券)
《深入理解Linux内核第3版》(美)博韦 >>>京东购买 >>>淘宝购买
《鸟哥的Linux私房菜:基础学习篇(第四版)》鸟哥 >>>京东购买 >>>淘宝购买

扫一扫关注微信公众号,上述5本电子书免费领取。
https://pan.baidu.com/s/1q5IjXAmybs8NBseR4R8Ksg
扫码关注微信公众号,回复“Linux” ,即可获取提取码
您可以选择一种方式赞助本站
支付宝扫一扫赞助
微信钱包扫描赞助
赏