LLVM学习笔记(43)

栏目: 服务器 · 编程工具 · 发布时间: 5年前

内容简介:SchedTables保存在WriteProcResources,WriteLatencies,ReadAdvanceEntries以及WriterNames容器里的数据是所有处理器公用的,因此下面的方法首先输出包含这些公用数据的数组。

3.6.2.3. 输出代码与数据结构

3.6.2.3.1. 资源使用与时延

SchedTables保存在WriteProcResources,WriteLatencies,ReadAdvanceEntries以及WriterNames容器里的数据是所有处理器公用的,因此下面的方法首先输出包含这些公用数据的数组。

1070   void SubtargetEmitter::EmitSchedClassTables (SchedClassTables &SchedTables,

1071   raw_ostream &OS) {

1072   // Emit global WriteProcResTable.

1073   OS << "\n// {ProcResourceIdx, Cycles}\n"

1074   << "extern const llvm::MCWriteProcResEntry "

1075   << Target << "WriteProcResTable[] = {\n"

1076   << "  { 0,  0}, // Invalid\n";

1077   for (unsigned WPRIdx = 1, WPREnd = SchedTables.WriteProcResources.size();

1078   WPRIdx != WPREnd; ++WPRIdx) {

1079   MCWriteProcResEntry &WPREntry = SchedTables.WriteProcResources[WPRIdx];

1080   OS << "  {" << format("%2d", WPREntry.ProcResourceIdx) << ", "

1081   << format("%2d", WPREntry.Cycles) << "}";

1082   if (WPRIdx + 1 < WPREnd)

1083   OS << ',';

1084   OS << " // #" << WPRIdx << '\n';

1085   }

1086   OS << "}; // " << Target << "WriteProcResTable\n";

1087  

1088   // Emit global WriteLatencyTable.

1089   OS << "\n// {Cycles, WriteResourceID}\n"

1090   << "extern const llvm::MCWriteLatencyEntry "

1091   << Target << "WriteLatencyTable[] = {\n"

1092   << "  { 0,  0}, // Invalid\n";

1093   for (unsigned WLIdx = 1, WLEnd = SchedTables.WriteLatencies.size();

1094   WLIdx != WLEnd; ++WLIdx) {

1095   MCWriteLatencyEntry &WLEntry = SchedTables.WriteLatencies[WLIdx];

1096   OS << "  {" << format("%2d", WLEntry.Cycles) << ", "

1097   << format("%2d", WLEntry.WriteResourceID) << "}";

1098   if (WLIdx + 1 < WLEnd)

1099   OS << ',';

1100   OS << " // #" << WLIdx << " " << SchedTables.WriterNames[WLIdx] << '\n';

1101   }

1102   OS << "}; // " << Target << "WriteLatencyTable\n";

1103  

1104   // Emit global ReadAdvanceTable.

1105   OS << "\n// {UseIdx, WriteResourceID, Cycles}\n"

1106   << "extern const llvm::MCReadAdvanceEntry "

1107   << Target << "ReadAdvanceTable[] = {\n"

1108   << "  {0,  0,  0}, // Invalid\n";

1109   for (unsigned RAIdx = 1, RAEnd = SchedTables.ReadAdvanceEntries.size();

1110   RAIdx != RAEnd; ++RAIdx) {

1111   MCReadAdvanceEntry &RAEntry = SchedTables.ReadAdvanceEntries[RAIdx];

1112   OS << "  {" << RAEntry.UseIdx << ", "

1113   << format("%2d", RAEntry.WriteResourceID) << ", "

1114   << format("%2d", RAEntry.Cycles) << "}";

1115   if (RAIdx + 1 < RAEnd)

1116   OS << ',';

1117   OS << " // #" << RAIdx << '\n';

1118   }

1119   OS << "}; // " << Target << "ReadAdvanceTable\n";

1120  

1121   // Emit a SchedClass table for each processor.

1122   for (CodeGenSchedModels::ProcIter PI = SchedModels.procModelBegin(),

1123   PE = SchedModels.procModelEnd(); PI != PE; ++PI) {

1124   if (!PI->hasInstrSchedModel())

1125   continue ;

1126  

1127   std::vector<MCSchedClassDesc> &SCTab =

1128   SchedTables.ProcSchedClasses[1 + (PI - SchedModels.procModelBegin())];

1129  

1130   OS << "\n// {Name, NumMicroOps, BeginGroup, EndGroup,"

1131   << " WriteProcResIdx,#, WriteLatencyIdx,#, ReadAdvanceIdx,#}\n";

1132   OS << "static const llvm::MCSchedClassDesc "

1133   << PI->ModelName << "SchedClasses[] = {\n";

1134  

1135   // The first class is always invalid. We no way to distinguish it except by

1136       // name and position.

1137   assert (SchedModels.getSchedClass(0).Name == "NoInstrModel"

1138   && "invalid class not first");

1139   OS << "  {DBGFIELD(\"InvalidSchedClass\")  "

1140   << MCSchedClassDesc::InvalidNumMicroOps

1141   << ", 0, 0,  0, 0,  0, 0,  0, 0},\n";

1142  

1143   for (unsigned SCIdx = 1, SCEnd = SCTab.size(); SCIdx != SCEnd; ++SCIdx) {

1144   MCSchedClassDesc &MCDesc = SCTab[SCIdx];

1145   const CodeGenSchedClass &SchedClass = SchedModels.getSchedClass(SCIdx);

1146   OS << "  {DBGFIELD(\"" << SchedClass.Name << "\") ";

1147   if (SchedClass.Name.size() < 18)

1148   OS.indent(18 - SchedClass.Name.size());

1149   OS << MCDesc.NumMicroOps

1150   << ", " << MCDesc.BeginGroup << ", " << MCDesc.EndGroup

1151   << ", " << format("%2d", MCDesc.WriteProcResIdx)

1152   << ", " << MCDesc.NumWriteProcResEntries

1153   << ", " << format("%2d", MCDesc.WriteLatencyIdx)

1154   << ", " << MCDesc.NumWriteLatencyEntries

1155   << ", " << format("%2d", MCDesc.ReadAdvanceIdx)

1156   << ", " << MCDesc.NumReadAdvanceEntries << "}";

1157   if (SCIdx + 1 < SCEnd)

1158   OS << ',';

1159   OS << " // #" << SCIdx << '\n';

1160   }

1161   OS << "}; // " << PI->ModelName << "SchedClasses\n";

1162   }

1163   }

首先在1073~1086行输出X86家族处理器公用的MCWriteProcResEntry数组:

// {ProcResourceIdx, Cycles}

extern const llvm:: X86WriteProcResTable [] = {

{ 0,  0}, // Invalid

{18,  1}, // #1

{19,  1}, // #2

{ 4,  1}, // #3

{ 8,  1}, // #4

{ 4,  1}, // #1217

{ 6,  1}, // #1218

{ 7,  2} // #1219

}; // X86WriteProcResTable

接着1088~1102行输出X86家族目标机器公用的MCWriteLatencyEntry数组:

// {Cycles, WriteResourceID}

extern const llvm:: X86WriteLatencyTable [] = {

{ 0,  0}, // Invalid

{100,  0}, // #1 WriteMicrocoded_WriteSystem

{ 1,  0}, // #2 WriteALU_WriteVecLogic_WriteZero_WriteFBlend_WriteShift_WriteJump_WriteFShuffle_WriteStore_WriteMove_WriteLEA_WriteFence_WriteShuffle_WriteVecALU_WriteVecShift_WriteNop_WriteBlend_Write2P237_P4_WritePushF_WritePushA_WritePopF_WritePopA_WriteP06_WriteBSwap32_WriteBSwap64_WriteMoveBE32rm_WriteMoveBE16mr_WriteMoveBE32mr_WriteMoveBE64mr_Write2P0156_2P237_P4_Write3P0156_2P237_P4_WriteP0156_2P237_P4_WriteShiftRMW_WriteShiftClLdRMW_Write2P06_WriteRotateRMW_WriteRotateRMWCL_WriteRCm1_WriteRCmi_WriteShDmr_WriteShDmrCL_WriteBTmr_WriteBTRSCmr_WriteSetCCm_WriteCldStd_WriteP15_WriteJCXZ_WriteLOOP_WriteCALLr_WriteCALLm_WriteRET_WriteRETI_WriteBOUND_WriteINTO_Write2P0156_P23_WriteP0156_P23_WriteSTOS_WriteXADD_WriteCMPXCHG_WriteCMPXCHG8B_WriteCMPXCHG16B_WritePAUSE_WriteXGETBV_WriteRDTSC_WriteRDPMC_WriteRDRAND_WriteST_FP80m_WriteFBSTP_WriteFNSTSW_WriteFNSTCW_WriteFNSAVE_WriteFRSTOR_WriteP1_P23_Write2P1_P23_Write5P0156_WriteFNINIT_WriteP5_WriteP01_P5_WritePBLENDWr_WriteVPBLENDDr_WritePEXTRm_WriteVPGATHERDD128_WriteVPGATHERDD256_WriteVPGATHERQD128_WriteVPGATHERQD256_WriteVPGATHERDQ128_WriteVPGATHERDQ256_WriteVPGATHERQQ128_WriteVPGATHERQQ256_WriteEXTRACTPSr_WriteVGATHERDPS128_WriteVGATHERDPS256_WriteVGATHERQPS128_WriteVGATHERQPS256_WriteVGATHERDPD128_WriteVGATHERDPD256_WriteVGATHERQPD128_WriteVGATHERQPD256_WriteVZEROUPPER_WriteVZEROALL_WriteFShuffle256_WriteShuffle256_WriteVarVecShift_WriteVarBlend_WriteFVarBlend

{ 5,  0}, // #3 WriteALULd_WriteVecLogicLd_WriteFBlendLd_WriteShiftLd_WriteJumpLd_WriteFShuffleLd_WriteVecALULd_WriteShuffleLd_WriteVecIMul_WriteVecShiftLd_WriteFMul_WriteBlendLd_WriteFRcp_WriteFRsqrt_WriteVPBROADCAST128Ld_WritePCMPGTQr_WritePCMPGTQm_WriteCVTPD2PSYrr_WriteCVTPS2PDrm_WriteVCVTPS2PDYrr_WriteCVTSS2SDrm_WriteHADDSUBPr_WriteMULr_WriteFMADDr_WriteRSQRTr_WriteP5Ld_WriteLoad_WriteShuffle256Ld_WriteFShuffle256Ld_WriteVarVecShiftLd

{ 1,  0}, // #4 WriteRMW

{ 3,  0}, // #54 WriteIMul

{ 1,  0}, // #55 WriteIMulH

{17,  0} // #56 WritePCmpEStrMLd_WritePCmpIStrILd

}; // X86WriteLatencyTable

旁边输出的注释是具有相同时延数据的SchedWrite定义(具体资源的使用则记录在调度类的MCSchedClassDesc对象里,GenSchedClassTables的938~940行)。另外,WriteResourceID域都是0,表明X86目标机器使用了适用于所有SchedWrite定义的ReadAdvance或SchedReadAdvance定义,或者没有使用ReadAdvance及SchedReadAdvance定义(因为随后输出了X86ReadAdvanceTable,因此显然是前者)。

1104~1119行输出X86家族目标机器公用的MCReadAdvanceEntry数组:

// {UseIdx, WriteResourceID, Cycles}

extern const llvm:: X86ReadAdvanceTable [] = {

{0,  0,  0}, // Invalid

{0,  0,  4}, // #1

{5,  0,  4}, // #2

{6,  0,  4}, // #3

{0,  0,  3}, // #4

{5,  0,  3}, // #5

{6,  0,  3} // #6

}; // X86ReadAdvanceTable

因为WriteResourceID域都是0,因此X86目标机器使用了适用于所有SchedWrite定义的ReadAdvance或SchedReadAdvance定义。另外,UseIdx是读操作数的索引,实际上是X86目标机器只定义了ReadAdvance<ReadAfterLd, 4>与ReadAdvance<ReadAfterLd, 3>,但由于ReadAfterLd在指令定义中可作为第0、5及6个读操作数出现,所以构建出上面的数组。

接下来的代码开始输出描述处理器调度类型的数组。

对X86目标机器,这些数组有:HaswellModelSchedClasses,BtVer2ModelSchedClasses,SandyBridgeModelSchedClasses,及SLMModelSchedClasses。其中SandyBridgeModelSchedClasses相关的数组是这样的:

// {Name, NumMicroOps, BeginGroup, EndGroup, WriteProcResIdx,#, WriteLatencyIdx,#, ReadAdvanceIdx,#}

static const llvm:: SandyBridgeModelSchedClasses [] = {

{DBGFIELD("InvalidSchedClass")  65535, 0, 0,  0, 0,  0, 0,  0, 0},

{DBGFIELD("IIC_AAA_WriteMicrocoded") 1, 0, 0, 108, 2,  1, 1,  0, 0}, // #1

{DBGFIELD("IIC_AAD_WriteMicrocoded") 1, 0, 0, 108, 2,  1, 1,  0, 0}, // #2

{DBGFIELD("IIC_AAM_WriteMicrocoded") 1, 0, 0, 108, 2,  1, 1,  0, 0}, // #3

{DBGFIELD("IIC_AAS_WriteMicrocoded") 1, 0, 0, 108, 2,  1, 1,  0, 0}, // #4

{DBGFIELD("VZEROALL")           0, 0, 0,  0, 0,  0, 0,  0, 0}, // #947

{DBGFIELD("LDMXCSR_VLDMXCSR")   1, 0, 0, 279, 2,  9, 1,  0, 0}, // #948

{DBGFIELD("STMXCSR_VSTMXCSR")   1, 0, 0, 439, 3,  2, 1,  0, 0} // #949

}; // SandyBridgeModelSchedClasses

DBGFIELD声明的域用于调试目的。所以的BeginGroup与EndGroup域都是0(false),表示这些调度类没有组成调度组(LLVM目前没有任何调度组)。剩下的(Idx,number)组给出了对前面生成数组的引用情况。以LDMXCSR_VLDMXCSR为例,它援引X86WriteLatencyTable的第10项:{4,  0}——这个SchedWrite有4周期时延,以及X86WriteProcResTable的第280、281项:{8,  1}, {10,  1}——占用编号为8及10的资源1周期。

3.6.2.3.2. 处理器资源模型

有了所有处理器调度类型的数组后,就该轮到输出描述处理器的数据结构了。同样,类似Atom的处理器与类似SandyBridge的处理器的处理方法是不一样的,因为在.td文件的处理器描述上,它们就有很大的区别。

1165   void SubtargetEmitter::EmitProcessorModels (raw_ostream &OS) {

1166   // For each processor model.

1167   for (CodeGenSchedModels::ProcIter PI = SchedModels.procModelBegin(),

1168   PE = SchedModels.procModelEnd(); PI != PE; ++PI) {

1169   // Emit processor resource table.

1170   if (PI->hasInstrSchedModel())

1171   EmitProcessorResources (*PI, OS);

1172   else if(!PI->ProcResourceDefs.empty())

1173   PrintFatalError(PI->ModelDef->getLoc(), "SchedMachineModel defines "

1174   "ProcResources without defining WriteRes SchedWriteRes");

1175  

1176   // Begin processor itinerary properties

1177   OS << "\n";

1178   OS << "static const llvm::MCSchedModel " << PI->ModelName << " = {\n";

1179   (OS, PI->ModelDef, "IssueWidth", ',');

1180   EmitProcessorProp(OS, PI->ModelDef, "MicroOpBufferSize", ',');

1181   EmitProcessorProp(OS, PI->ModelDef, "LoopMicroOpBufferSize", ',');

1182   EmitProcessorProp(OS, PI->ModelDef, "LoadLatency", ',');

1183   EmitProcessorProp(OS, PI->ModelDef, "HighLatency", ',');

1184   EmitProcessorProp(OS, PI->ModelDef, "MispredictPenalty", ',');

1185  

1186   OS << "  " << (bool)(PI->ModelDef ?

1187   PI->ModelDef->getValueAsBit("PostRAScheduler") : 0)

1188   << ", // " << "PostRAScheduler\n";

1189  

1190   OS << "  " << (bool)(PI->ModelDef ?

1191   PI->ModelDef->getValueAsBit("CompleteModel") : 0)

1192   << ", // " << "CompleteModel\n";

1193  

1194   OS << "  " << PI->Index << ", // Processor ID\n";

1195   if (PI->hasInstrSchedModel())

1196   OS << "  " << PI->ModelName << "ProcResources" << ",\n"

1197   << "  " << PI->ModelName << "SchedClasses" << ",\n"

1198   << "  " << PI->ProcResourceDefs.size()+1 << ",\n"

1199   << "  " << (SchedModels.schedClassEnd()

1200   - SchedModels.schedClassBegin()) << ",\n";

1201   else

1202   OS << "  0, 0, 0, 0, // No instruction-level machine model.\n";

1203   if (PI->hasItineraries())

1204   OS << "  " << PI->ItinsDef->getName() << "};\n";

1205   else

1206   OS << "  nullptr}; // No Itinerary\n";

1207   }

1208   }

对类似SandyBridge的处理器,首先调用下面的EmitProcessorResources方法输出描述资源的数据结构。因为这样的处理器需要另外描述资源,而类似Atom的处理器则是在ProcessorItineraries派生定义里给出资源描述。

605      void SubtargetEmitter::EmitProcessorResources( const CodeGenProcModel &ProcModel,

606      raw_ostream &OS) {

607      char Sep = ProcModel.ProcResourceDefs.empty() ? ' ' : ',';

608     

609      OS << "\n// {Name, NumUnits, SuperIdx, IsBuffered}\n";

610      OS << "static const llvm::MCProcResourceDesc "

611      << ProcModel.ModelName << "ProcResources" << "[] = {\n"

612      << "  {DBGFIELD(\"InvalidUnit\")     0, 0, 0}" << Sep << "\n";

613     

614      for (unsigned i = 0, e = ProcModel.ProcResourceDefs.size(); i < e; ++i) {

615      Record *PRDef = ProcModel.ProcResourceDefs[i];

616     

617      Record *SuperDef = nullptr;

618      unsigned SuperIdx = 0;

619      unsigned NumUnits = 0;

620      int BufferSize = PRDef->getValueAsInt("BufferSize");

621      if (PRDef->isSubClassOf("ProcResGroup")) {

622      RecVec ResUnits = PRDef->getValueAsListOfDefs("Resources");

623      for (RecIter RUI = ResUnits.begin(), RUE = ResUnits.end();

624      RUI != RUE; ++RUI) {

625      NumUnits += (*RUI)->getValueAsInt("NumUnits");

626      }

627      }

628      else {

629      // Find the SuperIdx

630      if (PRDef->getValueInit("Super")->isComplete()) {

631      SuperDef = SchedModels.findProcResUnits(

632      PRDef->getValueAsDef("Super"), ProcModel);

633      SuperIdx = ProcModel.getProcResourceIdx(SuperDef);

634      }

635      NumUnits = PRDef->getValueAsInt("NumUnits");

636      }

637      // Emit the ProcResourceDesc

638      if (i+1 == e)

639      Sep = ' ';

640      OS << "  {DBGFIELD(\"" << PRDef->getName() << "\") ";

641      if (PRDef->getName().size() < 15)

642      OS.indent(15 - PRDef->getName().size());

643      OS << NumUnits << ", " << SuperIdx << ", "

644      << BufferSize << "}" << Sep << " // #" << i+1;

645      if (SuperDef)

646      OS << ", Super=" << SuperDef->getName();

647      OS << "\n";

648      }

649      OS << "};\n";

650      }

每个类似SandyBridge的处理器都要输出一个资源数组,描述SandyBridge处理器资源的数组是:

// {Name, NumUnits, SuperIdx, IsBuffered}

static const llvm::MCProcResourceDesc SandyBridgeModelProcResources [] = {

{DBGFIELD("InvalidUnit")     0, 0, 0},

{DBGFIELD("SBDivider")       1, 0, -1}, // #1

{DBGFIELD("SBPort0")         1, 0, -1}, // #2

{DBGFIELD("SBPort1")         1, 0, -1}, // #3

{DBGFIELD("SBPort4")         1, 0, -1}, // #4

{DBGFIELD("SBPort5")         1, 0, -1}, // #5

{DBGFIELD("SBPort05")        2, 0, -1}, // #6

{DBGFIELD("SBPort15")        2, 0, -1}, // #7

{DBGFIELD("SBPort23")        2, 0, -1}, // #8

{DBGFIELD("SBPort015")       3, 0, -1}, // #9

{DBGFIELD("SBPortAny")       6, 0, 54} // #10

};

MCProcResourceDesc的定义与ProcResourceUnits类似。其中SuperIdx为0,表示不存在上级资源。注释里称为IsBuffered的域,实际上是BufferSize,-1表示发布口由统一的保留站(即SandyBridge处理器的硬件调度器)来供给。最后一行的54,表示SandyBridge处理器的硬件调度器一个周期能发布54条微操作。这是Sandy Bridge保留站的深度(reservation station,保留站的作用是排队微操作,直到所有的源操作数就绪,将就绪的微操作调度并分发到可用的执行单元)。

前面X86WriteProcResTable数组元素ProcResourceIdx成员的内容就是这个数组的索引。

方法EmitProcessorProp则对所有的处理器都适用。它辅助生成处理器的MCSchedModel数组。注意,对所有的目标机器家族,描述的第一个处理器总是NoSchedModel。

594      void SubtargetEmitter::EmitProcessorProp (raw_ostream &OS, const Record *R,

595      const char *Name, char Separator) {

596      OS << "  ";

597      int V = R ? R->getValueAsInt(Name) : -1;

598      if (V >= 0)

599      OS << V << Separator << " // " << Name;

600      else

601      OS << "MCSchedModel::Default" << Name << Separator;

602      OS << '\n';

603      }

参考在TargetSchedule.td中定义的,缺省的LoadLatency,MicroOpBufferSize,MinLatency,LoopMicroOpBufferSize,IssueWidth,HighLatency,MispredictPenalty都是-1,在输出数组时,这些缺省值被输出为MCSchedModel::Default XXX 。因此,对X86目标机器,NoSchedModel的输出是这样的:

static const llvm::MCSchedModel NoSchedModel = {

MCSchedModel::DefaultIssueWidth,

MCSchedModel::DefaultMicroOpBufferSize,

MCSchedModel::DefaultLoopMicroOpBufferSize,

MCSchedModel::DefaultLoadLatency,

MCSchedModel::DefaultHighLatency,

MCSchedModel::DefaultMispredictPenalty,

0, // PostRAScheduler

1, // CompleteModel

0, // Processor ID

0, 0, 0, 0, // No instruction-level machine model.

nullptr}; // No Itinerary

在X86家族里,指令调度得到良好描述的只有基于Atom,BtVer2,SLM,Haswell,SandyBridge架构的处理器,其他像i386,i686,pentium4m,k6,athlon系列,opteron系列等处理器LLVM并没有给出有关指令调度的细节(估计是找不到相关的文档)。对这些处理器,LLVM给出了一个通用的描述(当然也没有什么战力J)——,它的输出是这样的:

static const llvm::MCSchedModel GenericModel = {

4, // IssueWidth

32, // MicroOpBufferSize

MCSchedModel::DefaultLoopMicroOpBufferSize,

4, // LoadLatency

10, // HighLatency

MCSchedModel::DefaultMispredictPenalty,

0, // PostRAScheduler

1, // CompleteModel

1, // Processor ID

0, 0, 0, 0, // No instruction-level machine model.

nullptr}; // No Itinerary

对于我们前面看过的Atom与SandyBridge处理器,它们的输出则是:

static const llvm::MCSchedModel AtomModel = {

2, // IssueWidth

0, // MicroOpBufferSize

10, // LoopMicroOpBufferSize

3, // LoadLatency

30, // HighLatency

MCSchedModel::DefaultMispredictPenalty,

1, // PostRAScheduler

1, // CompleteModel

2, // Processor ID

0, 0, 0, 0, // No instruction-level machine model.

AtomItineraries};

static const llvm::MCSchedModel SandyBridgeModel = {

4, // IssueWidth

168, // MicroOpBufferSize

28, // LoopMicroOpBufferSize

4, // LoadLatency

MCSchedModel::DefaultHighLatency,

16, // MispredictPenalty

0, // PostRAScheduler

0, // CompleteModel

5, // Processor ID

SandyBridgeModelProcResources ,

SandyBridgeModelSchedClasses ,

11,

950,

nullptr}; // No Itinerary

对Atom处理器,MCSchedModel实例的ProcResourceTable(const MCProcResourceDesc*类型),SchedClassTable(const MCSchedClassDesc*类型),NumProcResourceKinds与NumSchedClasses都是0。而SandyBridge则指向前面生成的数组。

接下来,我们还要生成一个查找表,根据处理器的名字,给出对应的MCSchedModel实例。

1213   void SubtargetEmitter::EmitProcessorLookup (raw_ostream &OS) {

1214   // Gather and sort processor information

1215   std::vector<Record*> ProcessorList =

1216   Records.getAllDerivedDefinitions("Processor");

1217   std::sort(ProcessorList.begin(), ProcessorList.end(), LessRecordFieldName());

1218  

1219   // Begin processor table

1220   OS << "\n";

1221   OS << "// Sorted (by key) array of itineraries for CPU subtype.\n"

1222   << "extern const llvm::SubtargetInfoKV "

1223   << Target << "ProcSchedKV[] = {\n";

1224  

1225   // For each processor

1226   for (unsigned i = 0, N = ProcessorList.size(); i < N;) {

1227   // Next processor

1228   Record *Processor = ProcessorList[i];

1229  

1230   const std::string &Name = Processor->getValueAsString("Name");

1231   const std::string &ProcModelName =

1232   SchedModels.getModelForProc(Processor).ModelName;

1233  

1234   // Emit as { "cpu", procinit },

1235   OS << "  { \"" << Name << "\", (const void *)&" << ProcModelName << " }";

1236  

1237   // Depending on ''if more in the list'' emit comma

1238   if (++i < N) OS << ",";

1239  

1240   OS << "\n";

1241   }

1242  

1243   // End processor table

1244   OS << "};\n";

1245   }

这个查找表的类型是SubtargetInfoKV:

69        struct SubtargetInfoKV {

70        const char *Key;                      // K-V key string

71        const void *Value;                    // K-V pointer value

72       

73        // Compare routine for std::lower_bound

74        bool operator <(StringRef S) const {

75        return StringRef(Key) < S;

76        }

77        };

X86家族的这张表不算太大,下面给出它完整的定义:

// Sorted (by key) array of itineraries for CPU subtype.

extern const llvm::SubtargetInfoKV X86ProcSchedKV[] = {

{ "amdfam10", ( const void *)&GenericModel },

{ "athlon", ( const void *)&GenericModel },

{ "athlon-4", ( const void *)&GenericModel },

{ "athlon-fx", ( const void *)&GenericModel },

{ "athlon-mp", ( const void *)&GenericModel },

{ "athlon-tbird", ( const void *)&GenericModel },

{ "athlon-xp", ( const void *)&GenericModel },

{ "athlon64", ( const void *)&GenericModel },

{ "athlon64-sse3", ( const void *)&GenericModel },

{ "atom", ( const void *)&AtomModel },

{ "barcelona", ( const void *)&GenericModel },

{ "bdver1", ( const void *)&GenericModel },

{ "bdver2", ( const void *)&GenericModel },

{ "bdver3", ( const void *)&GenericModel },

{ "bdver4", ( const void *)&GenericModel },

{ "bonnell", ( const void *)&AtomModel },

{ "broadwell", ( const void *)&HaswellModel },

{ "btver1", ( const void *)&GenericModel },

{ "btver2", ( const void *)&BtVer2Model },

{ "c3", ( const void *)&GenericModel },

{ "c3-2", ( const void *)&GenericModel },

{ "core-avx-i", ( const void *)&SandyBridgeModel },

{ "core-avx2", ( const void *)&HaswellModel },

{ "core2", ( const void *)&SandyBridgeModel },

{ "corei7", ( const void *)&SandyBridgeModel },

{ "corei7-avx", ( const void *)&SandyBridgeModel },

{ "generic", ( const void *)&GenericModel },

{ "geode", ( const void *)&GenericModel },

{ "haswell", ( const void *)&HaswellModel },

{ "i386", ( const void *)&GenericModel },

{ "i486", ( const void *)&GenericModel },

{ "i586", ( const void *)&GenericModel },

{ "i686", ( const void *)&GenericModel },

{ "ivybridge", ( const void *)&SandyBridgeModel },

{ "k6", ( const void *)&GenericModel },

{ "k6-2", ( const void *)&GenericModel },

{ "k6-3", ( const void *)&GenericModel },

{ "k8", ( const void *)&GenericModel },

{ "k8-sse3", ( const void *)&GenericModel },

{ "knl", ( const void *)&HaswellModel },

{ "nehalem", ( const void *)&SandyBridgeModel },

{ "nocona", ( const void *)&GenericModel },

{ "opteron", ( const void *)&GenericModel },

{ "opteron-sse3", ( const void *)&GenericModel },

{ "penryn", ( const void *)&SandyBridgeModel },

{ "pentium", ( const void *)&GenericModel },

{ "pentium-m", ( const void *)&GenericModel },

{ "pentium-mmx", ( const void *)&GenericModel },

{ "pentium2", ( const void *)&GenericModel },

{ "pentium3", ( const void *)&GenericModel },

{ "pentium3m", ( const void *)&GenericModel },

{ "pentium4", ( const void *)&GenericModel },

{ "pentium4m", ( const void *)&GenericModel },

{ "pentiumpro", ( const void *)&GenericModel },

{ "prescott", ( const void *)&GenericModel },

{ "sandybridge", ( const void *)&SandyBridgeModel },

{ "silvermont", ( const void *)&SLMModel },

{ "skx", ( const void *)&HaswellModel },

{ "skylake", ( const void *)&HaswellModel },

{ "slm", ( const void *)&SLMModel },

{ "westmere", ( const void *)&SandyBridgeModel },

{ "winchip-c6", ( const void *)&GenericModel },

{ "winchip2", ( const void *)&GenericModel },

{ "x86-64", ( const void *)&SandyBridgeModel },

{ "yonah", ( const void *)&SandyBridgeModel }

};

因此,为了得到尽可能好的性能,应该通过命令行选项告诉LLVM目标机器是什么处理器。

回到SubtargetEmitter::run,下面的代码输出一个重要的方法:InitX86MCSubtargetInfo。

SubtargetEmitter::run(续)

1437   // MCInstrInfo initialization routine.

1438   OS << "static inline void Init" << Target

1439   << "MCSubtargetInfo(MCSubtargetInfo *II, "

1440   << "const Triple &TT, StringRef CPU, StringRef FS) {\n";

1441   OS << "  II->InitMCSubtargetInfo(TT, CPU, FS, ";

1442   if (NumFeatures)

1443   OS << Target << "FeatureKV, ";

1444   else

1445   OS << "None, ";

1446   if (NumProcs)

1447   OS << Target << "SubTypeKV, ";

1448   else

1449   OS << "None, ";

1450   OS << '\n'; OS.indent(22);

1451   OS << Target << "ProcSchedKV, "

1452   << Target << "WriteProcResTable, "

1453   << Target << "WriteLatencyTable, "

1454   << Target << "ReadAdvanceTable, ";

1455   if (SchedModels.hasItineraries()) {

1456   OS << '\n'; OS.indent(22);

1457   OS << Target << "Stages, "

1458   << Target << "OperandCycles, "

1459   << Target << "ForwardingPaths";

1460   } else

1461   OS << "0, 0, 0";

1462   OS << ");\n}\n\n";

1463  

1464   OS << "} // End llvm namespace \n";

1465  

1466   OS << "#endif // GET_SUBTARGETINFO_MC_DESC\n\n";

生成的InitX86MCSubtargetInfo方法的定义如下(连带收尾代码):

#undef DBGFIELD

static inline void InitX86MCSubtargetInfo (MCSubtargetInfo *II, const Triple &TT, StringRef CPU, StringRef FS) {

II->InitMCSubtargetInfo(TT, CPU, FS, X86FeatureKV, X86SubTypeKV,

X86ProcSchedKV, X86WriteProcResTable, X86WriteLatencyTable, X86ReadAdvanceTable,

X86Stages, X86OperandCycles, X86ForwardingPaths);

}

} // End llvm namespace

#endif // GET_SUBTARGETINFO_MC_DESC

通过这个方法,X86目标机器的数据就与机器无关的MC框架挂上钩。


以上所述就是小编给大家介绍的《LLVM学习笔记(43)》,希望对大家有所帮助,如果大家有任何疑问请给我留言,小编会及时回复大家的。在此也非常感谢大家对 码农网 的支持!

查看所有标签

猜你喜欢:

本站部分资源来源于网络,本站转载出于传递更多信息之目的,版权归原作者或者来源机构所有,如转载稿涉及版权问题,请联系我们

编程珠玑(续)(修订版)

编程珠玑(续)(修订版)

【美】Jon Bentley 乔恩•本特利 / 钱丽艳、刘田 / 人民邮电出版社 / 2015-2 / CNY 35.00

历史上最伟大的计算机科学著作之一 融深邃思想、实战技术与趣味轶事于一炉的奇书 带你真正领略计算机科学之美 多年以来,当程序员们推选出最心爱的计算机图书时,《编程珠玑》总是位于前列。正如自然界里珍珠出自细沙对牡蛎的磨砺,计算机科学大师Jon Bentley以其独有的洞察力和创造力,从磨砺程序员的实际问题中凝结出一篇篇不朽的编程“珠玑”,成为世界计算机界名刊《ACM通讯》历史上最受欢......一起来看看 《编程珠玑(续)(修订版)》 这本书的介绍吧!

在线进制转换器
在线进制转换器

各进制数互转换器

随机密码生成器
随机密码生成器

多种字符组合密码

MD5 加密
MD5 加密

MD5 加密工具